-/******************************************************************************\r
-*\r
-* Copyright (c) 2019 Intel.\r
-*\r
-* Licensed under the Apache License, Version 2.0 (the "License");\r
-* you may not use this file except in compliance with the License.\r
-* You may obtain a copy of the License at\r
-*\r
-* http://www.apache.org/licenses/LICENSE-2.0\r
-*\r
-* Unless required by applicable law or agreed to in writing, software\r
-* distributed under the License is distributed on an "AS IS" BASIS,\r
-* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\r
-* See the License for the specific language governing permissions and\r
-* limitations under the License.\r
-*\r
-*******************************************************************************/\r
-\r
-\r
-#include <cmath>\r
-#include <fstream>\r
-#include <numeric>\r
-\r
-#ifndef _WIN64\r
-#include <unistd.h>\r
-#include <sys/syscall.h>\r
-#else\r
-#include <Windows.h>\r
-#endif\r
-\r
-#include "common.hpp"\r
-\r
-#ifndef CPU_ID\r
-#define CPU_ID 4\r
-#endif\r
-\r
-/* Required to avoid linker errors */\r
-json KernelTests::conf;\r
-std::string KernelTests::test_type;\r
-unsigned long KernelTests::tsc;\r
-\r
-\r
-long BenchmarkParameters::repetition = 40;\r
-long BenchmarkParameters::loop = 30;\r
-unsigned BenchmarkParameters::cpu_id = CPU_ID;\r
-\r
-int bind_to_cpu(const unsigned cpu)\r
-{\r
-#ifndef _WIN64\r
- const auto pid = syscall(SYS_gettid);\r
- cpu_set_t mask {};\r
- CPU_ZERO(&mask);\r
- CPU_SET(cpu, &mask);\r
- return sched_setaffinity(__pid_t(pid), sizeof(mask), &mask);\r
-#else\r
- return -1;\r
-#endif\r
-}\r
-\r
-std::pair<double, double> calculate_statistics(const std::vector<long> values)\r
-{\r
- const auto sum = std::accumulate(values.begin(), values.end(), 0L);\r
-\r
- const auto number_of_iterations = BenchmarkParameters::repetition *\r
- BenchmarkParameters::loop;\r
-\r
- const auto mean = sum / (double) number_of_iterations;\r
-\r
- auto stddev_accumulator = 0.0;\r
- for (auto v : values)\r
- stddev_accumulator = pow((v / BenchmarkParameters::loop) - mean, 2);\r
-\r
- const auto stddev = sqrt(stddev_accumulator / BenchmarkParameters::repetition);\r
-\r
- return {mean, stddev};\r
-}\r
-\r
-std::vector<unsigned> get_sequence(const unsigned number)\r
-{\r
- std::vector<unsigned> sequence(number);\r
- std::iota(sequence.begin(), sequence.end(), 0);\r
-\r
- return sequence;\r
-}\r
-\r
-char* read_data_to_aligned_array(const std::string &filename)\r
-{\r
- std::ifstream input_stream(filename, std::ios::binary);\r
-\r
- std::vector<char> buffer((std::istreambuf_iterator<char>(input_stream)),\r
- std::istreambuf_iterator<char>());\r
-\r
- if(buffer.size() == 0)\r
- throw reading_input_file_exception();\r
-\r
- auto aligned_buffer = aligned_malloc<char>((int) buffer.size(), 64);\r
-\r
- if(aligned_buffer == nullptr)\r
- throw std::runtime_error("Failed to allocate memory for the test vector!");\r
-\r
- std::copy(buffer.begin(), buffer.end(), aligned_buffer);\r
-\r
- return aligned_buffer;\r
-}\r
-\r
-json read_json_from_file(const std::string &filename)\r
-{\r
- json result;\r
-\r
- std::ifstream json_stream(filename);\r
- if(!json_stream.is_open())\r
- throw missing_config_file_exception();\r
-\r
- json_stream >> result;\r
-\r
- return result;\r
-}\r
-\r
-unsigned long tsc_recovery()\r
-{\r
-#ifndef _WIN64\r
- constexpr auto ns_per_sec = 1E9;\r
-\r
- struct timespec sleeptime = {.tv_nsec = __syscall_slong_t(5E8) };\r
-\r
- struct timespec t_start, t_end;\r
-\r
- if (clock_gettime(CLOCK_MONOTONIC_RAW, &t_start) == 0)\r
- {\r
- unsigned long start = tsc_tick();\r
-\r
- nanosleep(&sleeptime,NULL);\r
- clock_gettime(CLOCK_MONOTONIC_RAW, &t_end);\r
-\r
- unsigned long end = tsc_tick();\r
-\r
- unsigned long ns = (unsigned long)((t_end.tv_sec - t_start.tv_sec) * ns_per_sec + t_end.tv_nsec - t_start.tv_nsec);\r
-\r
- double secs = (double) ns / ns_per_sec;\r
-\r
- unsigned long resolution_timer = (unsigned long)((end - start)/secs);\r
- unsigned long tick_per_usec = (resolution_timer / 1000000);\r
-\r
- std::cout << "[----------] System clock (rdtsc) resolution " << resolution_timer << " [Hz]" << std::endl;\r
- std::cout << "[----------] Ticks per us " << tick_per_usec << std::endl;\r
-\r
- return tick_per_usec;\r
- }\r
-#else\r
-\r
- LARGE_INTEGER tick_per_sec;\r
- QueryPerformanceFrequency(&tick_per_sec);\r
-\r
- std::cout << "[----------] System clock (rdtsc) resolution unknown" << std::endl;\r
- std::cout << "[----------] Ticks per us " << (tick_per_sec.QuadPart / 1000000) << std::endl;\r
- return (unsigned long) tick_per_sec.QuadPart;\r
-\r
-#endif\r
- return 0;\r
-}\r
-\r
-unsigned long tsc_tick()\r
-{\r
-#ifndef _WIN64\r
- unsigned long hi, lo;\r
-\r
- __asm volatile ("rdtsc" : "=a"(lo), "=d"(hi));\r
-\r
- return lo | (hi << 32);\r
-#else\r
- return 0;\r
-#endif\r
-}\r
-\r
-void KernelTests::print_and_store_results(const std::string &isa, const std::string ¶meters,\r
- const std::string &module_name, const std::string &test_name,\r
- const std::string &unit, const int para_factor,\r
- const double mean, const double stddev)\r
-{\r
- std::cout << "[----------] " << "Mean" << " = " << std::fixed << mean << " us" << std::endl;\r
- std::cout << "[----------] " << "Stddev" << " = " << stddev << " us" << std::endl;\r
-\r
-#ifndef _WIN64\r
- /* Two properties below should uniquely identify a test case */\r
- RecordProperty("kernelname", module_name);\r
- RecordProperty("parameters", parameters);\r
-\r
- RecordProperty("isa", isa);\r
- RecordProperty("unit", unit);\r
- RecordProperty("parallelization_factor", para_factor);\r
-\r
- RecordProperty("mean", std::to_string(mean));\r
- RecordProperty("stddev", std::to_string(stddev));\r
-#endif\r
-}\r
+/*******************************************************************************
+ *
+ * Copyright (c) 2020 Intel.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *
+ *******************************************************************************/
+
+#include <cmath>
+#include <fstream>
+#include <numeric>
+
+#ifndef _WIN64
+#include <unistd.h>
+#include <sys/syscall.h>
+#else
+#include <Windows.h>
+#endif
+
+#include "common.hpp"
+
+#ifndef CPU_ID
+#define CPU_ID 4
+#endif
+
+/* Required to avoid linker errors */
+json KernelTests::conf;
+std::string KernelTests::test_type;
+unsigned long KernelTests::tsc;
+
+
+long BenchmarkParameters::repetition = 40;
+long BenchmarkParameters::loop = 30;
+unsigned BenchmarkParameters::cpu_id = CPU_ID;
+
+int bind_to_cpu(const unsigned cpu)
+{
+#ifndef _WIN64
+ const auto pid = syscall(SYS_gettid);
+ cpu_set_t mask {};
+ CPU_ZERO(&mask);
+ CPU_SET(cpu, &mask);
+ return sched_setaffinity(__pid_t(pid), sizeof(mask), &mask);
+#else
+ return -1;
+#endif
+}
+
+std::pair<double, double> calculate_statistics(const std::vector<long> values)
+{
+ const auto sum = std::accumulate(values.begin(), values.end(), 0L);
+
+ const auto number_of_iterations = BenchmarkParameters::repetition *
+ BenchmarkParameters::loop;
+
+ const auto mean = sum / (double) number_of_iterations;
+
+ auto stddev_accumulator = 0.0;
+ for (auto v : values)
+ stddev_accumulator = pow((v / BenchmarkParameters::loop) - mean, 2);
+
+ const auto stddev = sqrt(stddev_accumulator / BenchmarkParameters::repetition);
+
+ return {mean, stddev};
+}
+
+std::vector<unsigned> get_sequence(const unsigned number)
+{
+ std::vector<unsigned> sequence(number);
+ std::iota(sequence.begin(), sequence.end(), 0);
+
+ return sequence;
+}
+
+char* read_data_to_aligned_array(const std::string &filename)
+{
+ std::ifstream input_stream(filename, std::ios::binary);
+
+ std::vector<char> buffer((std::istreambuf_iterator<char>(input_stream)),
+ std::istreambuf_iterator<char>());
+
+ if(buffer.size() == 0)
+ throw reading_input_file_exception();
+
+ auto aligned_buffer = aligned_malloc<char>((int) buffer.size(), 64);
+
+ if(aligned_buffer == nullptr)
+ throw std::runtime_error("Failed to allocate memory for the test vector!");
+
+ std::copy(buffer.begin(), buffer.end(), aligned_buffer);
+
+ return aligned_buffer;
+}
+
+json read_json_from_file(const std::string &filename)
+{
+ json result;
+
+ std::ifstream json_stream(filename);
+ if(!json_stream.is_open())
+ throw missing_config_file_exception();
+
+ json_stream >> result;
+
+ return result;
+}
+
+unsigned long tsc_recovery()
+{
+#ifndef _WIN64
+ constexpr auto ns_per_sec = 1E9;
+
+ struct timespec sleeptime = {.tv_nsec = __syscall_slong_t(5E8) };
+
+ struct timespec t_start, t_end;
+
+ if (clock_gettime(CLOCK_MONOTONIC_RAW, &t_start) == 0)
+ {
+ unsigned long start = tsc_tick();
+
+ nanosleep(&sleeptime,NULL);
+ clock_gettime(CLOCK_MONOTONIC_RAW, &t_end);
+
+ unsigned long end = tsc_tick();
+
+ unsigned long ns = (unsigned long)((t_end.tv_sec - t_start.tv_sec) * ns_per_sec + t_end.tv_nsec - t_start.tv_nsec);
+
+ double secs = (double) ns / ns_per_sec;
+
+ unsigned long resolution_timer = (unsigned long)((end - start)/secs);
+ unsigned long tick_per_usec = (resolution_timer / 1000000);
+
+ std::cout << "[----------] System clock (rdtsc) resolution " << resolution_timer << " [Hz]" << std::endl;
+ std::cout << "[----------] Ticks per us " << tick_per_usec << std::endl;
+
+ return tick_per_usec;
+ }
+#else
+
+ LARGE_INTEGER tick_per_sec;
+ QueryPerformanceFrequency(&tick_per_sec);
+
+ std::cout << "[----------] System clock (rdtsc) resolution unknown" << std::endl;
+ std::cout << "[----------] Ticks per us " << (tick_per_sec.QuadPart / 1000000) << std::endl;
+ return (unsigned long) tick_per_sec.QuadPart;
+
+#endif
+ return 0;
+}
+
+unsigned long tsc_tick()
+{
+#ifndef _WIN64
+ unsigned long hi, lo;
+
+ __asm volatile ("rdtsc" : "=a"(lo), "=d"(hi));
+
+ return lo | (hi << 32);
+#else
+ return 0;
+#endif
+}
+
+void KernelTests::print_and_store_results(const std::string &isa, const std::string ¶meters,
+ const std::string &module_name, const std::string &test_name,
+ const std::string &unit, const int para_factor,
+ const double mean, const double stddev)
+{
+ std::cout << "[----------] " << "Mean" << " = " << std::fixed << mean << " us" << std::endl;
+ std::cout << "[----------] " << "Stddev" << " = " << stddev << " us" << std::endl;
+
+#ifndef _WIN64
+ /* Two properties below should uniquely identify a test case */
+ RecordProperty("kernelname", module_name);
+ RecordProperty("parameters", parameters);
+
+ RecordProperty("isa", isa);
+ RecordProperty("unit", unit);
+ RecordProperty("parallelization_factor", para_factor);
+
+ RecordProperty("mean", std::to_string(mean));
+ RecordProperty("stddev", std::to_string(stddev));
+#endif
+}