#include <benchmark/benchmark.h>
#include <algorithm>
#include <cstdlib>
#include <vector>
#include <iostream>

using std::generate;
using std::vector;

// Benchmark for showing the impact cache associativity
static void assocBench(benchmark::State &s) {
  // Use a variable step size (power of 2)
  int step = 1 << s.range(0);

  const int N = 1 << 25;
  vector<char> v(N);

  // Number of accesses
  const int MAX_ITER = 1 << 12;

  // Profile the runtime of different step sizes
  while (s.KeepRunning()) {
    // Perform 4k accesses
    int i = 0;
    for (int iter = 0; iter < MAX_ITER; iter++) {
      // Just increment this int
      v[i]++;

      // Reset if we go off the end of the array
      i += step;
      if (i >= N) {
	i = 0;
      }
    }
  }
  s.counters["param"] = s.range(0);
  s.counters["step"] = step;
}
// Register the benchmark
BENCHMARK(assocBench)->DenseRange(1, 128); 

// Benchmark main function
BENCHMARK_MAIN();
