1 /******************************************************************************
\r
3 * Copyright (c) 2019 Intel.
\r
5 * Licensed under the Apache License, Version 2.0 (the "License");
\r
6 * you may not use this file except in compliance with the License.
\r
7 * You may obtain a copy of the License at
\r
9 * http://www.apache.org/licenses/LICENSE-2.0
\r
11 * Unless required by applicable law or agreed to in writing, software
\r
12 * distributed under the License is distributed on an "AS IS" BASIS,
\r
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
\r
14 * See the License for the specific language governing permissions and
\r
15 * limitations under the License.
\r
17 *******************************************************************************/
\r
26 #include <sys/syscall.h>
\r
28 #include <Windows.h>
\r
31 #include "common.hpp"
\r
37 /* Required to avoid linker errors */
\r
38 json KernelTests::conf;
\r
39 std::string KernelTests::test_type;
\r
40 unsigned long KernelTests::tsc;
\r
43 long BenchmarkParameters::repetition = 40;
\r
44 long BenchmarkParameters::loop = 30;
\r
45 unsigned BenchmarkParameters::cpu_id = CPU_ID;
\r
47 int bind_to_cpu(const unsigned cpu)
\r
50 const auto pid = syscall(SYS_gettid);
\r
53 CPU_SET(cpu, &mask);
\r
54 return sched_setaffinity(__pid_t(pid), sizeof(mask), &mask);
\r
60 std::pair<double, double> calculate_statistics(const std::vector<long> values)
\r
62 const auto sum = std::accumulate(values.begin(), values.end(), 0L);
\r
64 const auto number_of_iterations = BenchmarkParameters::repetition *
\r
65 BenchmarkParameters::loop;
\r
67 const auto mean = sum / (double) number_of_iterations;
\r
69 auto stddev_accumulator = 0.0;
\r
70 for (auto v : values)
\r
71 stddev_accumulator = pow((v / BenchmarkParameters::loop) - mean, 2);
\r
73 const auto stddev = sqrt(stddev_accumulator / BenchmarkParameters::repetition);
\r
75 return {mean, stddev};
\r
78 std::vector<unsigned> get_sequence(const unsigned number)
\r
80 std::vector<unsigned> sequence(number);
\r
81 std::iota(sequence.begin(), sequence.end(), 0);
\r
86 char* read_data_to_aligned_array(const std::string &filename)
\r
88 std::ifstream input_stream(filename, std::ios::binary);
\r
90 std::vector<char> buffer((std::istreambuf_iterator<char>(input_stream)),
\r
91 std::istreambuf_iterator<char>());
\r
93 if(buffer.size() == 0)
\r
94 throw reading_input_file_exception();
\r
96 auto aligned_buffer = aligned_malloc<char>((int) buffer.size(), 64);
\r
98 if(aligned_buffer == nullptr)
\r
99 throw std::runtime_error("Failed to allocate memory for the test vector!");
\r
101 std::copy(buffer.begin(), buffer.end(), aligned_buffer);
\r
103 return aligned_buffer;
\r
106 json read_json_from_file(const std::string &filename)
\r
110 std::ifstream json_stream(filename);
\r
111 if(!json_stream.is_open())
\r
112 throw missing_config_file_exception();
\r
114 json_stream >> result;
\r
119 unsigned long tsc_recovery()
\r
122 constexpr auto ns_per_sec = 1E9;
\r
124 struct timespec sleeptime = {.tv_nsec = __syscall_slong_t(5E8) };
\r
126 struct timespec t_start, t_end;
\r
128 if (clock_gettime(CLOCK_MONOTONIC_RAW, &t_start) == 0)
\r
130 unsigned long start = tsc_tick();
\r
132 nanosleep(&sleeptime,NULL);
\r
133 clock_gettime(CLOCK_MONOTONIC_RAW, &t_end);
\r
135 unsigned long end = tsc_tick();
\r
137 unsigned long ns = (unsigned long)((t_end.tv_sec - t_start.tv_sec) * ns_per_sec + t_end.tv_nsec - t_start.tv_nsec);
\r
139 double secs = (double) ns / ns_per_sec;
\r
141 unsigned long resolution_timer = (unsigned long)((end - start)/secs);
\r
142 unsigned long tick_per_usec = (resolution_timer / 1000000);
\r
144 std::cout << "[----------] System clock (rdtsc) resolution " << resolution_timer << " [Hz]" << std::endl;
\r
145 std::cout << "[----------] Ticks per us " << tick_per_usec << std::endl;
\r
147 return tick_per_usec;
\r
151 LARGE_INTEGER tick_per_sec;
\r
152 QueryPerformanceFrequency(&tick_per_sec);
\r
154 std::cout << "[----------] System clock (rdtsc) resolution unknown" << std::endl;
\r
155 std::cout << "[----------] Ticks per us " << (tick_per_sec.QuadPart / 1000000) << std::endl;
\r
156 return (unsigned long) tick_per_sec.QuadPart;
\r
162 unsigned long tsc_tick()
\r
165 unsigned long hi, lo;
\r
167 __asm volatile ("rdtsc" : "=a"(lo), "=d"(hi));
\r
169 return lo | (hi << 32);
\r
175 void KernelTests::print_and_store_results(const std::string &isa, const std::string ¶meters,
\r
176 const std::string &module_name, const std::string &test_name,
\r
177 const std::string &unit, const int para_factor,
\r
178 const double mean, const double stddev)
\r
180 std::cout << "[----------] " << "Mean" << " = " << std::fixed << mean << " us" << std::endl;
\r
181 std::cout << "[----------] " << "Stddev" << " = " << stddev << " us" << std::endl;
\r
184 /* Two properties below should uniquely identify a test case */
\r
185 RecordProperty("kernelname", module_name);
\r
186 RecordProperty("parameters", parameters);
\r
188 RecordProperty("isa", isa);
\r
189 RecordProperty("unit", unit);
\r
190 RecordProperty("parallelization_factor", para_factor);
\r
192 RecordProperty("mean", std::to_string(mean));
\r
193 RecordProperty("stddev", std::to_string(stddev));
\r