Basic Intro to C++ Multithreading and Allocators (Advanced)

#include <omp.h>

#include <cstdio>

int main() {
  // Note that blocks { ... } immediately
  // follow parallel directives!
  // In this case, think of `parallel`
  // similar to `for (...)`
#pragma omp parallel
  {
    int ID = omp_get_thread_num();
    int TOTAL = omp_get_num_threads();
    printf("Hello from thread %d/%d\n", ID, TOTAL);
  }
}

#include <algorithm>
#include <array>
#include <execution>
#include <iostream>
#include <random>
#include <ranges>
#include <vector>

using CellT = bool;
using NeighboursT = int;

// C++23 feature, we'll just make a UDL for it
size_t operator""_uz(unsigned long long int x);

// Calculates the number of live neighbours (out of 8)
//
// Neighbours of x:
//    1 2 3
//    4 x 5
//    6 7 8
NeighboursT neighbours(auto* board, size_t i, size_t j);

struct Row : std::array<CellT, 1026> {
  alignas(128) struct { } padding; };

std::array<Row, 1026> board1;
std::array<Row, 1026> board2;

int main() {
  auto* curr_board = &board1;
  auto* next_board = &board2;

  std::array<std::array<CellT, 3>, 3> glider{{
      {{0, 1, 0}},
      {{0, 0, 1}},
      {{1, 1, 1}},
  }};
  for (size_t i = 0; i < 3; ++i) {
    for (size_t j = 0; j < 3; ++j) {
      (*curr_board)[i + 1][j + 1] = glider[i][j];
    }
  }

  for (size_t iteration = 0; iteration < 2048; ++iteration) {
    for (size_t i = 1; i < 1024_uz + 1; ++i) {
      for (size_t j = 1; j < 1024_uz + 1; ++j) {
        NeighboursT num_neighbours = neighbours(curr_board, i, j);
        if (num_neighbours == 2) {
          (*next_board)[i][j] = (*curr_board)[i][j];
        } else {
          (*next_board)[i][j] = num_neighbours == 3;
        }
      }
    }

    std::swap(curr_board, next_board);
  }

  for (size_t i = 512; i < 512 + 3; ++i) {
    for (size_t j = 512; j < 512 + 3; ++j) {
      std::cout << (*curr_board)[i + 1][j + 1] << ' ';
    }
    std::cout << '\n';
  }

  return 0;
}

size_t operator""_uz(unsigned long long int x) {
  return x;
}

NeighboursT neighbours(auto* board, size_t i, size_t j) {
  NeighboursT num_neighbours = 0;
  for (size_t ni = i - 1; ni <= i + 1; ++ni) {
    for (size_t nj = j - 1; nj <= j + 1; ++nj) {
      num_neighbours += (*board)[ni][nj];
    }
  }
  num_neighbours -= (*board)[i][j];
  return num_neighbours;
}

for (size_t iteration = 0; iteration < 2048; ++iteration) {
  for (size_t i = 1; i < 1024_uz + 1; ++i) {
    for (size_t j = 1; j < 1024_uz + 1; ++j) {
      NeighboursT num_neighbours = neighbours(curr_board, i, j);
      if (num_neighbours == 2) {
        (*next_board)[i][j] = (*curr_board)[i][j];
      } else {
        (*next_board)[i][j] = num_neighbours == 3;
      }
    }
  }

  std::swap(curr_board, next_board);
}

for (size_t iteration = 0; iteration < 2048; ++iteration) {
#pragma omp parallel
  {
    size_t ID = (size_t) omp_get_thread_num();
    size_t TOTAL = (size_t) omp_get_num_threads();

    size_t rows_per_thread = (1024_uz + TOTAL - 1) / TOTAL;

    // Using the thread ID,
    // only compute on the rows we're responsible for
    for (size_t i = ID * rows_per_thread + 1;
         i < std::min(1024_uz, (ID + 1) * rows_per_thread) + 1;
         ++i) {
      for (size_t j = 1; j < 1024_uz + 1; ++j) {
        NeighboursT num_neighbours = neighbours(curr_board, i, j);
        if (num_neighbours == 2) {
          (*next_board)[i][j] = (*curr_board)[i][j];
        } else {
          (*next_board)[i][j] = num_neighbours == 3;
        }
      }
    }
  }

  std::swap(curr_board, next_board);
}

for (size_t iteration = 0; iteration < 2048; ++iteration) {
#pragma omp parallel for
  for (size_t i = 1; i < 1024_uz + 1; ++i) {
    for (size_t j = 1; j < 1024_uz + 1; ++j) {
      NeighboursT num_neighbours = neighbours(curr_board, i, j);
      if (num_neighbours == 2) {
        (*next_board)[i][j] = (*curr_board)[i][j];
      } else {
        (*next_board)[i][j] = num_neighbours == 3;
      }
    }
  }

  std::swap(curr_board, next_board);
}

for (size_t iteration = 0; iteration < 2048; ++iteration) {
#pragma omp parallel for schedule(static, 1)
  for (size_t i = 1; i < 1024_uz + 1; ++i) {
    for (size_t j = 1; j < 1024_uz + 1; ++j) {
      NeighboursT num_neighbours = neighbours(curr_board, i, j);
      if (num_neighbours == 2) {
        (*next_board)[i][j] = (*curr_board)[i][j];
      } else {
        (*next_board)[i][j] = num_neighbours == 3;
      }
    }
  }

  std::swap(curr_board, next_board);
}

#include <algorithm>
#include <iostream>
#include <random>
#include <vector>

// C++23 feature, we'll just make a UDL for it
size_t operator""_uz(unsigned long long int x);

int main() {
  // We'll make a vector containing a bunch of random numbers
  // Use a fixed seed so it's consistent across benchmarks
  std::mt19937_64 rng{420};
  std::uniform_int_distribution dist{0_uz, 1_uz << 63};
  std::vector<size_t> nums;
  const size_t NUM_NUMS = 1 << 24;
  nums.reserve(NUM_NUMS);
  for (size_t i = 0; i < NUM_NUMS; ++i) {
    nums.push_back(dist(rng));
  }

  // Then sort it
  std::sort(nums.begin(), nums.end());

  std::cout << "smallest number is: " << nums[0] << std::endl;
}

size_t operator""_uz(unsigned long long int x) {
  return x;
}

std::sort(std::execution::par_unseq, nums.begin(), nums.end());

#include <algorithm>
#include <array>
#include <concepts>
#include <execution>
#include <iostream>
#include <random>
#include <ranges>
#include <vector>

using CellT = bool;
using NeighboursT = int;

// See std::ranges::iota_view
// This is an iterator that returns T
template <typename T = size_t, T Stride = 1>
requires std::integral<T>
struct ValIter;

// C++23 feature, we'll just make a UDL for it
size_t operator""_uz(unsigned long long int x);

// Calculates the number of live neighbours (out of 8)
//
// Neighbours of x:
//    1 2 3
//    4 x 5
//    6 7 8
NeighboursT neighbours(auto* board, size_t i, size_t j);

struct Row : std::array<CellT, 1026> {
  alignas(128) struct { } padding; };

std::array<Row, 1026> board1;
std::array<Row, 1026> board2;

template <typename st = size_t>
int my_main() {
  // Magic: Make ValIter a dependent name so it can compile
  //        even though ValIter isn't defined yet.
  using ValIter = ValIter<st>;

  auto* __restrict curr_board = &board1;
  auto* __restrict next_board = &board2;

  std::array<std::array<CellT, 3>, 3> glider{{
      {{0, 1, 0}},
      {{0, 0, 1}},
      {{1, 1, 1}},
  }};
  for (size_t i = 0; i < 3; ++i) {
    for (size_t j = 0; j < 3; ++j) {
      (*curr_board)[i + 1][j + 1] = glider[i][j];
    }
  }

  static_assert(std::random_access_iterator<ValIter>);

  for (size_t iteration = 0; iteration < 2048; ++iteration) {
    // Note that we capture by value here because we only
    // capture pointers (curr_board, next_board) or indices (i).
    // It would be silly to capture a reference to a pointer,
    // as that would effectively be a double pointer,
    // and in fact, we get much worse performance on clang++
    // when capturing by reference.
    //
    // Here, capturing by value is really cheap,
    // so there's no downside to doing so.
    std::for_each(  //
        ValIter{1_uz},
        ValIter{1024_uz + 1},
        [=](size_t i) {
          std::for_each(  //
              ValIter{1_uz},
              ValIter{1024_uz + 1},
              [=](size_t j) {
                NeighboursT num_neighbours = neighbours(curr_board, i, j);
                if (num_neighbours == 2) {
                  (*next_board)[i][j] = (*curr_board)[i][j];
                } else {
                  (*next_board)[i][j] = num_neighbours == 3;
                }
              });
        });

    std::swap(curr_board, next_board);
  }

  for (size_t i = 512; i < 512 + 3; ++i) {
    for (size_t j = 512; j < 512 + 3; ++j) {
      std::cout << (*curr_board)[i + 1][j + 1] << ' ';
    }
    std::cout << '\n';
  }

  return 0;
}

template <typename T, T Stride>
requires std::integral<T>
struct ValIter {
  using ST = std::make_signed_t<T>;
  T value;

  using iterator_category = std::random_access_iterator_tag;
  using value_type = T;

  T operator*() const {
    return value;
  }
  T operator[](ST inc) const {
    return value + static_cast<T>(inc);
  }

  ValIter& operator++() {
    value += Stride;
    return *this;
  }
  ValIter operator++(int) {
    ValIter it{value};
    value += Stride;
    return it;
  }

  ValIter& operator--() {
    value -= Stride;
    return *this;
  }
  ValIter operator--(int) {
    ValIter it{value};
    value -= Stride;
    return it;
  }

  ValIter& operator+=(ST i) {
    value += i * Stride;
    return *this;
  }

  ValIter& operator-=(ST i) {
    value -= i * Stride;
    return *this;
  }

  ST operator-(ValIter other) const {
    return static_cast<ST>((value - other.value) / Stride);
  }

  friend ValIter operator-(ValIter i, ST dec) {
    return ValIter{i.value - static_cast<T>(dec) * Stride};
  }

  friend ValIter operator+(ValIter i, ST inc) {
    return ValIter{i.value + static_cast<T>(inc) * Stride};
  }

  friend ValIter operator+(ST inc, ValIter i) {
    return ValIter{i.value + static_cast<T>(inc) * Stride};
  }

  friend std::strong_ordering operator<=>(ValIter lhs,
                                          ValIter rhs) = default;

  friend bool operator==(ValIter lhs, ValIter rhs) = default;
};

size_t operator""_uz(unsigned long long int x) {
  return x;
}

NeighboursT neighbours(auto* board, size_t i, size_t j) {
  NeighboursT num_neighbours = 0;
  for (size_t ni = i - 1; ni <= i + 1; ++ni) {
    for (size_t nj = j - 1; nj <= j + 1; ++nj) {
      num_neighbours += (*board)[ni][nj];
    }
  }
  num_neighbours -= (*board)[i][j];
  return num_neighbours;
}

int main() {
  return my_main();
}

for (size_t iteration = 0; iteration < 2048; ++iteration) {
  // Note that we capture by value here because we only
  // capture pointers (curr_board, next_board) or indices (i).
  // It would be silly to capture a reference to a pointer,
  // as that would effectively be a double pointer,
  // and in fact, we get much worse performance on clang++
  // when capturing by reference.
  //
  // Here, capturing by value is really cheap,
  // so there's no downside to doing so.
  std::for_each(  //
      ValIter{1_uz},
      ValIter{1024_uz + 1},
      [=](size_t i) {
        std::for_each(  //
            ValIter{1_uz},
            ValIter{1024_uz + 1},
            [=](size_t j) {
              NeighboursT num_neighbours = neighbours(curr_board, i, j);
              if (num_neighbours == 2) {
                (*next_board)[i][j] = (*curr_board)[i][j];
              } else {
                (*next_board)[i][j] = num_neighbours == 3;
              }
            });
      });

  std::swap(curr_board, next_board);
}

#include <cstdio>
#include <thread>
#include <vector>

int main() {
  std::vector<std::thread> threads;
  // Make 8 threads, each one
  // executing the code in the lambda
  size_t TOTAL = 8;
  threads.reserve(TOTAL);
  for (size_t ID = 0; ID < TOTAL; ++ID) {
    threads.emplace_back([=]() {  //
      printf("Hello from thread %zu/%zu\n", ID, TOTAL);
    });
  }

  // Wait for all the threads to exit
  // If we don't, then we will crash,
  // since we cannot destroy a `std::thread`
  // without either `join`ing it or `detach`ing it.
  for (auto& t : threads) {
    t.join();
  }
}

#include <algorithm>
#include <array>
#include <condition_variable>
#include <execution>
#include <future>
#include <iostream>
#include <optional>
#include <queue>
#include <random>
#include <ranges>
#include <thread>
#include <vector>

using CellT = bool;
using NeighboursT = int;

// C++23 feature, we'll just make a UDL for it
size_t operator""_uz(unsigned long long int x);

// Calculates the number of live neighbours (out of 8)
//
// Neighbours of x:
//    1 2 3
//    4 x 5
//    6 7 8
NeighboursT neighbours(auto* board, size_t i, size_t j);

struct Row : std::array<CellT, 1026> {
  alignas(128) struct { } padding; };

std::array<Row, 1026> board1;
std::array<Row, 1026> board2;

template <typename T>
struct Channel {
  std::mutex mut{};
  std::condition_variable cond{};
  std::queue<T> elems{};
  bool closed = false;

  std::optional<T> pop() {
    std::unique_lock lock{mut};
    cond.wait(lock, [this]() { return !elems.empty() || closed; });
    if (!elems.empty()) {
      // We exited the wait due to having an element to pop
      T elem = std::move(elems.front());
      elems.pop();
      if (!elems.empty()) {
        lock.unlock();
      }
      return elem;
    }
    // We exited the wait due to closed being true
    return std::nullopt;
  }

  void push(T elem) {
    {
      std::scoped_lock lock{mut};
      if (closed) {
        throw std::runtime_error("Push into closed channel is a panic");
      }
      elems.push(std::move(elem));
    }
    cond.notify_all();
  }

  template <typename Iter>
  void push_range(Iter begin, Iter end) {
    {
      std::scoped_lock lock{mut};
      if (closed) {
        throw std::runtime_error("Push into closed channel is a panic");
      }
      for (; begin != end; ++begin) {
        elems.push(std::move(*begin));
      }
    }
    cond.notify_all();
  }

  void close() {
    {
      std::scoped_lock lock{mut};
      if (closed) {
        throw std::runtime_error("Closing closed channel is a panic");
      }
      closed = true;
    }
    cond.notify_all();
  }
};

struct Worker {
  Channel<std::packaged_task<void()>>& chan;
  std::thread worker_thread{[this]() {
    while (true) {
      if (auto task = chan.pop(); !task) {
        return;
      } else {
        (*task)();
      }
    }
  }};

  ~Worker() {
    worker_thread.join();
  }
};

struct Pool {
  Channel<std::packaged_task<void()>> chan;
  std::array<Worker, 8> workers{
      {{chan}, {chan}, {chan}, {chan}, {chan}, {chan}, {chan}, {chan}}  //
  };

  void submit_task(std::packaged_task<void()> task) {
    chan.push(std::move(task));
  }

  template <typename Iter>
  void submit_task_range(Iter begin, Iter end) {
    chan.push_range(begin, end);
  }

  ~Pool() {
    chan.close();
  }
};

Pool pool;

int main() {
  auto* curr_board = &board1;
  auto* next_board = &board2;

  std::array<std::array<CellT, 3>, 3> glider{{
      {{0, 1, 0}},
      {{0, 0, 1}},
      {{1, 1, 1}},
  }};
  for (size_t i = 0; i < 3; ++i) {
    for (size_t j = 0; j < 3; ++j) {
      (*curr_board)[i + 1][j + 1] = glider[i][j];
    }
  }

  for (size_t iteration = 0; iteration < 2048; ++iteration) {
    std::vector<std::packaged_task<void()>> tasks;
    std::vector<std::future<void>> futures;
    tasks.reserve(8);
    futures.reserve(8);

    for (size_t job_id = 0; job_id < 8; ++job_id) {
      tasks.emplace_back([=]() mutable {
        for (size_t i = job_id * (1024_uz / 8) + 1;
             i < (job_id + 1) * (1024_uz / 8) + 1;
             ++i) {
          for (size_t j = 1; j < 1024_uz + 1; ++j) {
            NeighboursT num_neighbours = neighbours(curr_board, i, j);
            if (num_neighbours == 2) {
              (*next_board)[i][j] = (*curr_board)[i][j];
            } else {
              (*next_board)[i][j] = num_neighbours == 3;
            }
          }
        }
      });
      futures.push_back(tasks.back().get_future());
    }

    pool.submit_task_range(tasks.begin(), tasks.end());

    for (const auto& f : futures) {
      f.wait();
    }

    std::swap(curr_board, next_board);
  }

  for (size_t i = 512; i < 512 + 3; ++i) {
    for (size_t j = 512; j < 512 + 3; ++j) {
      std::cout << (*curr_board)[i + 1][j + 1] << ' ';
    }
    std::cout << '\n';
  }

  return 0;
}

size_t operator""_uz(unsigned long long int x) {
  return x;
}

NeighboursT neighbours(auto* board, size_t i, size_t j) {
  NeighboursT num_neighbours = 0;
  for (size_t ni = i - 1; ni <= i + 1; ++ni) {
    for (size_t nj = j - 1; nj <= j + 1; ++nj) {
      num_neighbours += (*board)[ni][nj];
    }
  }
  num_neighbours -= (*board)[i][j];
  return num_neighbours;
}

#include <benchmark/benchmark.h>

#include <deque>
#include <fstream>
#include <iostream>
#include <memory>
#include <mutex>
#include <numeric>
#include <ranges>
#include <sstream>
#include <stdexcept>
#include <vector>

template <typename Derived>
struct ExprCommon {
  enum Op {
    VALUE,
    PLUS,
    MINUS,
    TIMES,
    DIVIDE,
  };

  static constexpr char opcodes[] = {
      '?',
      '+',
      '-',
      '*',
      '/',
  };

  double evaluate() const {
    const Derived& self = static_cast<const Derived&>(*this);
    switch (self.op) {
      case VALUE: {
        return self.value;
      }
      case PLUS: {
        double res = 0.0;
        for (const Derived& e : self.args) {
          res += e.evaluate();
        }
        return res;
      }
      case MINUS: {
        if (self.args.size() == 0) {
          throw std::runtime_error("Bad number of arguments to -");
        }
        if (self.args.size() == 1) {
          // Unary -
          // aka negation
          return -self.args[0].evaluate();
        }

        // First arg minus the remaining self.args
        auto it = self.args.cbegin();
        double res = it->evaluate();
        ++it;
        for (; it != self.args.cend(); ++it) {
          res -= it->evaluate();
        }
        return res;
      }
      case TIMES: {
        double res = 1.0;
        for (const Derived& e : self.args) {
          res *= e.evaluate();
        }
        return res;
      }
      case DIVIDE: {
        if (self.args.size() == 0) {
          throw std::runtime_error("Bad number of arguments to /");
        }
        if (self.args.size() == 1) {
          // Unary /
          // aka reciprocal
          return 1.0 / self.args[0].evaluate();
        }

        // First arg divide the remaining self.args
        auto it = self.args.cbegin();
        double res = it->evaluate();
        ++it;
        for (; it != self.args.cend(); ++it) {
          res /= it->evaluate();
        }
        return res;
      }
      default:
        throw std::runtime_error("Invalid opcode in evaluate");
    }
  }

  friend std::ostream& operator<<(std::ostream& os, const Derived& self) {
    if (self.op == self.VALUE) {
      return os << self.value;
    }
    os << '(' << opcodes[self.op];
    for (const Derived& e : self.self.args) {
      os << ' ' << e;
    }
    os << ')';
    return os;
  }

  friend std::istream& operator>>(std::istream& is, Derived& self) {
    is >> std::ws;
    if (is.peek() != '(') {
      self.op = self.VALUE;
      is >> self.value;
      return is;
    }

    // Need to parse complex expression
    is.get();
    char op;
    is >> std::ws >> op;
    switch (op) {
      case '+':
        self.op = PLUS;
        break;
      case '-':
        self.op = MINUS;
        break;
      case '*':
        self.op = TIMES;
        break;
      case '/':
        self.op = DIVIDE;
        break;
      default:
        throw std::runtime_error("Invalid opcode");
    }

    while (true) {
      is >> std::ws;
      if (is.peek() == ')') {
        is.get();
        return is;
      }

      self.args.emplace_back();
      is >> self.args.back();
    }
  }
};

struct Expr : ExprCommon<Expr> {
  Op op = VALUE;
  std::vector<Expr> args{};
  double value = 0.0;
};

template <typename T>
struct GlobalMonotonicAllocator {
  static inline char buffer[200'000];
  static inline char* next_allocation = buffer;

  using value_type = T;

  T* allocate(size_t n) {
    T* ptr = reinterpret_cast<T*>(next_allocation);
    next_allocation += sizeof(T) * n;
    if (next_allocation > buffer + 200'000) {
      throw std::bad_alloc();
    }
    return ptr;
  }

  void deallocate(T*, size_t) {
    // Monotonic means deallocate is no-op
  }

  static void release() {
    // deallocate all allocations so far in one go
    next_allocation = buffer;
  }

  friend bool operator==(const GlobalMonotonicAllocator&,
                         const GlobalMonotonicAllocator&) = default;
};

struct GlobalMonotonicExpr : ExprCommon<GlobalMonotonicExpr> {
  Op op = VALUE;
  std::vector<GlobalMonotonicExpr,
              GlobalMonotonicAllocator<GlobalMonotonicExpr>>
      args{};
  double value = 0.0;
};

template <typename T>
struct StatefulMonotonicAllocator {
  struct Buffer {
    char* begin;
    char* end;
    char* next_allocation;
  };
  Buffer* buffer;

  using value_type = T;

  T* allocate(size_t n) {
    T* ptr = reinterpret_cast<T*>(buffer->next_allocation);
    buffer->next_allocation += sizeof(T) * n;
    if (buffer->next_allocation > buffer->end) {
      throw std::bad_alloc();
    }
    return ptr;
  }

  void deallocate(T*, size_t) {
    // Monotonic means deallocate is no-op
  }

  void release() {
    // deallocate all allocations so far in one go
    buffer->next_allocation = buffer->begin;
  }

  friend bool operator==(const StatefulMonotonicAllocator&,
                         const StatefulMonotonicAllocator&) = default;

  // Also, when a vector tries to emplace back, it will ask our allocator
  // to construct. Here's our chance to copy over the allocator.
  template <typename... Args>
  void construct(T* ptr, Args&&... args) {
    // Fancy C++20 version
    std::uninitialized_construct_using_allocator(
        ptr, *this, std::forward<Args>(args)...);

    // Cursed pre-C++20 version
    // Don't do drugs, kids
    // std::apply(
    //     [=]<typename... Xs>(Xs&&... xs) {
    //       std::construct_at(ptr, std::forward<Xs>(xs)...);
    //     },
    //     std::uses_allocator_construction_args<T>(
    //         *this, std::forward<Args>(args)...));
  }
};

struct StatefulMonotonicExpr : ExprCommon<StatefulMonotonicExpr> {
  using allocator_type =
      StatefulMonotonicAllocator<StatefulMonotonicExpr>;

  Op op = VALUE;
  std::vector<StatefulMonotonicExpr, allocator_type> args{};
  double value = 0.0;

  // We need to forward the allocator to the inner vector of exprs
  StatefulMonotonicExpr() = default;
  explicit StatefulMonotonicExpr(allocator_type alloc)
      : op{VALUE}, args{alloc} {}
  StatefulMonotonicExpr(const StatefulMonotonicExpr&) = default;
  StatefulMonotonicExpr(StatefulMonotonicExpr&&) = default;
  StatefulMonotonicExpr(const StatefulMonotonicExpr& other,
                        allocator_type alloc)
      : op{other.op}, args{other.args, alloc}, value{other.value} {}
  StatefulMonotonicExpr(StatefulMonotonicExpr&& other,
                        allocator_type alloc)
      : op{other.op},
        args{std::move(other.args), alloc},
        value{other.value} {}
  StatefulMonotonicExpr& operator=(const StatefulMonotonicExpr&) =
      default;
  StatefulMonotonicExpr& operator=(StatefulMonotonicExpr&&) = default;

  static_assert(
      std::uses_allocator_v<StatefulMonotonicExpr, allocator_type>);
};

static void BM_Default(benchmark::State& state) {
  std::ostringstream sstr;
  sstr << std::ifstream("test-0.700.in").rdbuf();
  std::string s = sstr.str();
  for (auto _ : state) {
    std::istringstream ss;
    for (size_t i = 0; i < 1024; ++i) {
      Expr e{};
      ss.str(s);
      ss >> e;
      Expr copy{e};
      benchmark::DoNotOptimize(copy.evaluate());
    }
  }
}

BENCHMARK(BM_Default);

static void BM_GlobalMonotonic(benchmark::State& state) {
  std::ostringstream sstr;
  sstr << std::ifstream("test-0.700.in").rdbuf();
  std::string s = sstr.str();
  for (auto _ : state) {
    std::istringstream ss;
    for (size_t i = 0; i < 1024; ++i) {
      GlobalMonotonicExpr e;
      ss.str(s);
      ss >> e;
      GlobalMonotonicExpr copy{e};
      benchmark::DoNotOptimize(copy.evaluate());
      GlobalMonotonicAllocator<GlobalMonotonicExpr>::release();
    }
  }
}

BENCHMARK(BM_GlobalMonotonic);

static void BM_LocalMonotonic(benchmark::State& state) {
  std::ostringstream sstr;
  sstr << std::ifstream("test-0.700.in").rdbuf();
  std::string s = sstr.str();
  std::unique_ptr pbuf = std::make_unique<char[]>(200'000);
  StatefulMonotonicAllocator<StatefulMonotonicExpr>::Buffer buf{
      pbuf.get(), pbuf.get() + 200'000, pbuf.get()};
  StatefulMonotonicAllocator<StatefulMonotonicExpr> alloc{&buf};
  for (auto _ : state) {
    std::istringstream ss;
    for (size_t i = 0; i < 1024; ++i) {
      StatefulMonotonicExpr e{alloc};
      ss.str(s);
      ss >> e;
      StatefulMonotonicExpr copy{e, alloc};
      benchmark::DoNotOptimize(copy.evaluate());
      alloc.release();
    }
  }
}

BENCHMARK(BM_LocalMonotonic);

BENCHMARK_MAIN();

#include <benchmark/benchmark.h>

#include <deque>
#include <fstream>
#include <iostream>
#include <memory>
#include <memory_resource>
#include <mutex>
#include <numeric>
#include <ranges>
#include <sstream>
#include <stdexcept>
#include <vector>

struct Expr {
  enum Op {
    VALUE,
    PLUS,
    MINUS,
    TIMES,
    DIVIDE,
  } op = VALUE;

  std::vector<Expr> args{};
  double value = 0.0;

  double evaluate() const {
    switch (op) {
      case VALUE: {
        return value;
      }
      case PLUS: {
        double res = 0.0;
        for (const Expr& e : args) {
          res += e.evaluate();
        }
        return res;
      }
      case MINUS: {
        if (args.size() == 0) {
          throw std::runtime_error("Bad number of arguments to -");
        }
        if (args.size() == 1) {
          // Unary -
          // aka negation
          return -args[0].evaluate();
        }

        // First arg minus the remaining args
        auto it = args.cbegin();
        double res = it->evaluate();
        ++it;
        for (; it != args.cend(); ++it) {
          res -= it->evaluate();
        }
        return res;
      }
      case TIMES: {
        double res = 1.0;
        for (const Expr& e : args) {
          res *= e.evaluate();
        }
        return res;
      }
      case DIVIDE: {
        if (args.size() == 0) {
          throw std::runtime_error("Bad number of arguments to /");
        }
        if (args.size() == 1) {
          // Unary /
          // aka reciprocal
          return 1.0 / args[0].evaluate();
        }

        // First arg divide the remaining args
        auto it = args.cbegin();
        double res = it->evaluate();
        ++it;
        for (; it != args.cend(); ++it) {
          res /= it->evaluate();
        }
        return res;
      }
      default:
        throw std::runtime_error("Invalid opcode in evaluate");
    }
  }

  static constexpr char opcodes[] = {
      '?',
      '+',
      '-',
      '*',
      '/',
  };
  friend std::ostream& operator<<(std::ostream& os, const Expr& self) {
    if (self.op == VALUE) {
      return os << self.value;
    }
    os << '(' << opcodes[self.op];
    for (const Expr& e : self.args) {
      os << ' ' << e;
    }
    os << ')';
    return os;
  }

  friend std::istream& operator>>(std::istream& is, Expr& self) {
    is >> std::ws;
    if (is.peek() != '(') {
      self.op = VALUE;
      is >> self.value;
      return is;
    }

    // Need to parse complex expression
    is.get();
    char op;
    is >> std::ws >> op;
    switch (op) {
      case '+':
        self.op = PLUS;
        break;
      case '-':
        self.op = MINUS;
        break;
      case '*':
        self.op = TIMES;
        break;
      case '/':
        self.op = DIVIDE;
        break;
      default:
        throw std::runtime_error("Invalid opcode");
    }

    while (true) {
      is >> std::ws;
      if (is.peek() == ')') {
        is.get();
        return is;
      }

      self.args.emplace_back();
      is >> self.args.back();
    }
  }
};

struct PMRExpr {
  enum Op {
    VALUE,
    PLUS,
    MINUS,
    TIMES,
    DIVIDE,
  } op;

  std::pmr::vector<PMRExpr> args;
  double value;

  using allocator_type = std::pmr::polymorphic_allocator<Expr>;

  // Default ctor and allocator aware version
  PMRExpr() : op{VALUE}, args{}, value{0.0} {}
  explicit PMRExpr(const allocator_type& alloc)
      : op{VALUE}, args{alloc}, value{0.0} {}
  // Note the explicit!
  // We don't want allocators to implicitly convert to Exprs.
  // Without explicit, the following would compile:
  //
  //   std::pmr::monotonic_buffer_resource resource;
  //   std::pmr::polymorphic_allocator<Expr> alloc{&resource};
  //   Expr e = alloc;

  // Copy and allocator aware version
  PMRExpr(const PMRExpr& other) = default;
  PMRExpr& operator=(const PMRExpr& other) = default;
  PMRExpr(const PMRExpr& other, const allocator_type& alloc)
      : op{other.op}, args{other.args, alloc}, value{other.value} {}

  // Move and allocator aware version
  PMRExpr(PMRExpr&& other) = default;
  PMRExpr& operator=(PMRExpr&& other) = default;
  PMRExpr(PMRExpr&& other, const allocator_type& alloc)
      : op{std::move(other.op)},
        args{std::move(other.args), alloc},
        value{std::move(other.value)} {}

  double evaluate() const {
    switch (op) {
      case VALUE: {
        return value;
      }
      case PLUS: {
        double res = 0.0;
        for (const PMRExpr& e : args) {
          res += e.evaluate();
        }
        return res;
      }
      case MINUS: {
        if (args.size() == 0) {
          throw std::runtime_error("Bad number of arguments to -");
        }
        if (args.size() == 1) {
          // Unary -
          // aka negation
          return -args[0].evaluate();
        }

        // First arg minus the remaining args
        auto it = args.cbegin();
        double res = it->evaluate();
        ++it;
        for (; it != args.cend(); ++it) {
          res -= it->evaluate();
        }
        return res;
      }
      case TIMES: {
        double res = 1.0;
        for (const PMRExpr& e : args) {
          res *= e.evaluate();
        }
        return res;
      }
      case DIVIDE: {
        if (args.size() == 0) {
          throw std::runtime_error("Bad number of arguments to /");
        }
        if (args.size() == 1) {
          // Unary /
          // aka reciprocal
          return 1.0 / args[0].evaluate();
        }

        // First arg divide the remaining args
        auto it = args.cbegin();
        double res = it->evaluate();
        ++it;
        for (; it != args.cend(); ++it) {
          res /= it->evaluate();
        }
        return res;
      }
      default:
        throw std::runtime_error("Invalid opcode in evaluate");
    }
  }

  static constexpr char opcodes[] = {
      '?',
      '+',
      '-',
      '*',
      '/',
  };
  friend std::ostream& operator<<(std::ostream& os, const PMRExpr& self) {
    if (self.op == VALUE) {
      return os << self.value;
    }
    os << '(' << opcodes[self.op];
    for (const PMRExpr& e : self.args) {
      os << ' ' << e;
    }
    os << ')';
    return os;
  }

  friend std::istream& operator>>(std::istream& is, PMRExpr& self) {
    is >> std::ws;
    if (is.peek() != '(') {
      self.op = VALUE;
      is >> self.value;
      return is;
    }

    // Need to parse complex expression
    is.get();
    char op;
    is >> std::ws >> op;
    switch (op) {
      case '+':
        self.op = PLUS;
        break;
      case '-':
        self.op = MINUS;
        break;
      case '*':
        self.op = TIMES;
        break;
      case '/':
        self.op = DIVIDE;
        break;
      default:
        throw std::runtime_error("Invalid opcode");
    }

    while (true) {
      is >> std::ws;
      if (is.peek() == ')') {
        is.get();
        return is;
      }

      self.args.emplace_back();
      is >> self.args.back();
    }
  }
};

struct NoDestroyPMRExpr {
  enum Op {
    VALUE,
    PLUS,
    MINUS,
    TIMES,
    DIVIDE,
  } op;

  alignas(alignof(std::pmr::vector<NoDestroyPMRExpr>)) char args_storage
      [sizeof(std::pmr::vector<NoDestroyPMRExpr>)];

  std::pmr::vector<NoDestroyPMRExpr>& args() {
    return reinterpret_cast<std::pmr::vector<NoDestroyPMRExpr>&>(
        args_storage);
  };
  const std::pmr::vector<NoDestroyPMRExpr>& args() const {
    return reinterpret_cast<const std::pmr::vector<NoDestroyPMRExpr>&>(
        args_storage);
  };
  double value;

  using allocator_type = std::pmr::polymorphic_allocator<Expr>;

  // Default ctor and allocator aware version
  NoDestroyPMRExpr() : op{VALUE}, value{0.0} {
    std::construct_at(&args());
  }
  explicit NoDestroyPMRExpr(const allocator_type& alloc)
      : op{VALUE}, value{0.0} {
    std::construct_at(&args(), alloc);
  }
  // Note the explicit!
  // We don't want allocators to implicitly convert to Exprs.
  // Without explicit, the following would compile:
  //
  //   std::pmr::monotonic_buffer_resource resource;
  //   std::pmr::polymorphic_allocator<Expr> alloc{&resource};
  //   Expr e = alloc;

  // Copy and allocator aware version
  NoDestroyPMRExpr(const NoDestroyPMRExpr& other) = default;
  NoDestroyPMRExpr& operator=(const NoDestroyPMRExpr& other) = default;
  NoDestroyPMRExpr(const NoDestroyPMRExpr& other,
                   const allocator_type& alloc)
      : op{other.op}, value{other.value} {
    std::construct_at(&args(), other.args(), alloc);
  }

  // Move and allocator aware version
  NoDestroyPMRExpr(NoDestroyPMRExpr&& other) = default;
  NoDestroyPMRExpr& operator=(NoDestroyPMRExpr&& other) = default;
  NoDestroyPMRExpr(NoDestroyPMRExpr&& other, const allocator_type& alloc)
      : op{std::move(other.op)}, value{std::move(other.value)} {
    std::construct_at(&args(), std::move(other.args()), alloc);
  }

  double evaluate() const {
    switch (op) {
      case VALUE: {
        return value;
      }
      case PLUS: {
        double res = 0.0;
        for (const NoDestroyPMRExpr& e : args()) {
          res += e.evaluate();
        }
        return res;
      }
      case MINUS: {
        if (args().size() == 0) {
          throw std::runtime_error("Bad number of arguments to -");
        }
        if (args().size() == 1) {
          // Unary -
          // aka negation
          return -args()[0].evaluate();
        }

        // First arg minus the remaining args()
        auto it = args().cbegin();
        double res = it->evaluate();
        ++it;
        for (; it != args().cend(); ++it) {
          res -= it->evaluate();
        }
        return res;
      }
      case TIMES: {
        double res = 1.0;
        for (const NoDestroyPMRExpr& e : args()) {
          res *= e.evaluate();
        }
        return res;
      }
      case DIVIDE: {
        if (args().size() == 0) {
          throw std::runtime_error("Bad number of arguments to /");
        }
        if (args().size() == 1) {
          // Unary /
          // aka reciprocal
          return 1.0 / args()[0].evaluate();
        }

        // First arg divide the remaining args()
        auto it = args().cbegin();
        double res = it->evaluate();
        ++it;
        for (; it != args().cend(); ++it) {
          res /= it->evaluate();
        }
        return res;
      }
      default:
        throw std::runtime_error("Invalid opcode in evaluate");
    }
  }

  static constexpr char opcodes[] = {
      '?',
      '+',
      '-',
      '*',
      '/',
  };
  friend std::ostream& operator<<(std::ostream& os,
                                  const NoDestroyPMRExpr& self) {
    if (self.op == VALUE) {
      return os << self.value;
    }
    os << '(' << opcodes[self.op];
    for (const NoDestroyPMRExpr& e : self.args()) {
      os << ' ' << e;
    }
    os << ')';
    return os;
  }

  friend std::istream& operator>>(std::istream& is,
                                  NoDestroyPMRExpr& self) {
    is >> std::ws;
    if (is.peek() != '(') {
      self.op = VALUE;
      is >> self.value;
      return is;
    }

    // Need to parse complex expression
    is.get();
    char op;
    is >> std::ws >> op;
    switch (op) {
      case '+':
        self.op = PLUS;
        break;
      case '-':
        self.op = MINUS;
        break;
      case '*':
        self.op = TIMES;
        break;
      case '/':
        self.op = DIVIDE;
        break;
      default:
        throw std::runtime_error("Invalid opcode");
    }

    while (true) {
      is >> std::ws;
      if (is.peek() == ')') {
        is.get();
        return is;
      }

      self.args().emplace_back();
      is >> self.args().back();
    }
  }
};

static void BM_Default(benchmark::State& state) {
  std::ostringstream sstr;
  sstr << std::ifstream("test-0.700.in").rdbuf();
  std::string s = sstr.str();
  for (auto _ : state) {
    std::istringstream ss;
    for (size_t i = 0; i < 1024; ++i) {
      Expr e{};
      ss.str(s);
      ss >> e;
      Expr copy{e};
      benchmark::DoNotOptimize(copy.evaluate());
    }
  }
}

BENCHMARK(BM_Default);

static void BM_Monotonic_1M(benchmark::State& state) {
  std::ostringstream sstr;
  sstr << std::ifstream("test-0.700.in").rdbuf();
  std::string s = sstr.str();
  std::unique_ptr pbuf = std::make_unique<std::array<char, 1'000'000>>();
  auto& buf = *pbuf;
  for (auto _ : state) {
    std::istringstream ss;
    for (size_t i = 0; i < 1024; ++i) {
      std::pmr::monotonic_buffer_resource resource{buf.begin(),
                                                   buf.size()};
      PMRExpr e{&resource};
      ss.str(s);
      ss >> e;
      PMRExpr copy{e, &resource};
      benchmark::DoNotOptimize(copy.evaluate());
    }
  }
}

BENCHMARK(BM_Monotonic_1M);

static void BM_Monotonic_2M(benchmark::State& state) {
  std::ostringstream sstr;
  sstr << std::ifstream("test-0.700.in").rdbuf();
  std::string s = sstr.str();
  std::unique_ptr pbuf = std::make_unique<std::array<char, 2'000'000>>();
  auto& buf = *pbuf;
  for (auto _ : state) {
    std::istringstream ss;
    for (size_t i = 0; i < 1024; ++i) {
      std::pmr::monotonic_buffer_resource resource{buf.begin(),
                                                   buf.size()};
      PMRExpr e{&resource};
      ss.str(s);
      ss >> e;
      PMRExpr copy{e, &resource};
      benchmark::DoNotOptimize(copy.evaluate());
    }
  }
}

BENCHMARK(BM_Monotonic_2M);

static void BM_Monotonic_4M(benchmark::State& state) {
  std::ostringstream sstr;
  sstr << std::ifstream("test-0.700.in").rdbuf();
  std::string s = sstr.str();
  std::unique_ptr pbuf = std::make_unique<std::array<char, 4'000'000>>();
  auto& buf = *pbuf;
  for (auto _ : state) {
    std::istringstream ss;
    for (size_t i = 0; i < 1024; ++i) {
      std::pmr::monotonic_buffer_resource resource{buf.begin(),
                                                   buf.size()};
      PMRExpr e{&resource};
      ss.str(s);
      ss >> e;
      PMRExpr copy{e, &resource};
      benchmark::DoNotOptimize(copy.evaluate());
    }
  }
}

BENCHMARK(BM_Monotonic_4M);

static void BM_Monotonic_8M(benchmark::State& state) {
  std::ostringstream sstr;
  sstr << std::ifstream("test-0.700.in").rdbuf();
  std::string s = sstr.str();
  std::unique_ptr pbuf = std::make_unique<std::array<char, 8'000'000>>();
  auto& buf = *pbuf;
  for (auto _ : state) {
    std::istringstream ss;
    for (size_t i = 0; i < 1024; ++i) {
      std::pmr::monotonic_buffer_resource resource{buf.begin(),
                                                   buf.size()};
      PMRExpr e{&resource};
      ss.str(s);
      ss >> e;
      PMRExpr copy{e, &resource};
      benchmark::DoNotOptimize(copy.evaluate());
    }
  }
}

BENCHMARK(BM_Monotonic_8M);

static void BM_Monotonic_12M(benchmark::State& state) {
  std::ostringstream sstr;
  sstr << std::ifstream("test-0.700.in").rdbuf();
  std::string s = sstr.str();
  std::unique_ptr pbuf = std::make_unique<std::array<char, 12'000'000>>();
  auto& buf = *pbuf;
  for (auto _ : state) {
    std::istringstream ss;
    for (size_t i = 0; i < 1024; ++i) {
      std::pmr::monotonic_buffer_resource resource{buf.begin(),
                                                   buf.size()};
      PMRExpr e{&resource};
      ss.str(s);
      ss >> e;
      PMRExpr copy{e, &resource};
      benchmark::DoNotOptimize(copy.evaluate());
    }
  }
}

BENCHMARK(BM_Monotonic_12M);

static void BM_MonotonicNoDestroyTop_1M(benchmark::State& state) {
  std::ostringstream sstr;
  sstr << std::ifstream("test-0.700.in").rdbuf();
  std::string s = sstr.str();
  std::unique_ptr pbuf = std::make_unique<std::array<char, 1'000'000>>();
  auto& buf = *pbuf;
  for (auto _ : state) {
    std::istringstream ss;
    for (size_t i = 0; i < 1024; ++i) {
      std::pmr::monotonic_buffer_resource resource{buf.begin(),
                                                   buf.size()};
      std::pmr::polymorphic_allocator<Expr> alloc{&resource};
      // This way, we allocate a PMRExpr into the buffer resource,
      // but we DON'T properly `delete` it!
      // This is fine in this very specific case as we're making use
      // of certain assumptions about how std::vector is implemented,
      // namely that it's ok if the destructor isn't called if all the
      // subobjects will be released when the allocator goes out of
      // scope.
      PMRExpr* pe = alloc.allocate_object<PMRExpr>();
      std::construct_at(pe, &resource);
      PMRExpr& e = *pe;
      ss.str(s);
      ss >> e;
      PMRExpr* pcopy = alloc.allocate_object<PMRExpr>();
      std::construct_at(pcopy, e, &resource);
      PMRExpr& copy = *pcopy;
      benchmark::DoNotOptimize(copy.evaluate());
    }
  }
}

BENCHMARK(BM_MonotonicNoDestroyTop_1M);

static void BM_MonotonicNoDestroyTop_8M(benchmark::State& state) {
  std::ostringstream sstr;
  sstr << std::ifstream("test-0.700.in").rdbuf();
  std::string s = sstr.str();
  std::unique_ptr pbuf = std::make_unique<std::array<char, 8'000'000>>();
  auto& buf = *pbuf;
  for (auto _ : state) {
    std::istringstream ss;
    for (size_t i = 0; i < 1024; ++i) {
      std::pmr::monotonic_buffer_resource resource{buf.begin(),
                                                   buf.size()};
      std::pmr::polymorphic_allocator<Expr> alloc{&resource};
      // This way, we allocate a PMRExpr into the buffer resource,
      // but we DON'T properly `delete` it!
      // This is fine in this very specific case as we're making use
      // of certain assumptions about how std::vector is implemented,
      // namely that it's ok if the destructor isn't called if all the
      // subobjects will be released when the allocator goes out of
      // scope.
      PMRExpr* pe = alloc.allocate_object<PMRExpr>();
      std::construct_at(pe, &resource);
      PMRExpr& e = *pe;
      ss.str(s);
      ss >> e;
      PMRExpr* pcopy = alloc.allocate_object<PMRExpr>();
      std::construct_at(pcopy, e, &resource);
      PMRExpr& copy = *pcopy;
      benchmark::DoNotOptimize(copy.evaluate());
    }
  }
}

BENCHMARK(BM_MonotonicNoDestroyTop_8M);

static void BM_MonotonicNoDestroy_1M(benchmark::State& state) {
  std::ostringstream sstr;
  sstr << std::ifstream("test-0.700.in").rdbuf();
  std::string s = sstr.str();
  std::unique_ptr pbuf = std::make_unique<std::array<char, 1'000'000>>();
  auto& buf = *pbuf;
  for (auto _ : state) {
    std::istringstream ss;
    for (size_t i = 0; i < 1024; ++i) {
      std::pmr::monotonic_buffer_resource resource{buf.begin(),
                                                   buf.size()};
      NoDestroyPMRExpr e{&resource};
      ss.str(s);
      ss >> e;
      NoDestroyPMRExpr copy{e, &resource};
      benchmark::DoNotOptimize(copy.evaluate());
    }
  }
}

BENCHMARK(BM_MonotonicNoDestroy_1M);

static void BM_MonotonicNoDestroy_8M(benchmark::State& state) {
  std::ostringstream sstr;
  sstr << std::ifstream("test-0.700.in").rdbuf();
  std::string s = sstr.str();
  std::unique_ptr pbuf = std::make_unique<std::array<char, 8'000'000>>();
  auto& buf = *pbuf;
  for (auto _ : state) {
    std::istringstream ss;
    for (size_t i = 0; i < 1024; ++i) {
      std::pmr::monotonic_buffer_resource resource{buf.begin(),
                                                   buf.size()};
      NoDestroyPMRExpr e{&resource};
      ss.str(s);
      ss >> e;
      NoDestroyPMRExpr copy{e, &resource};
      benchmark::DoNotOptimize(copy.evaluate());
    }
  }
}

BENCHMARK(BM_MonotonicNoDestroy_8M);

static void BM_Pool(benchmark::State& state) {
  std::ostringstream sstr;
  sstr << std::ifstream("test-0.700.in").rdbuf();
  std::string s = sstr.str();
  for (auto _ : state) {
    std::istringstream ss;
    for (size_t i = 0; i < 1024; ++i) {
      std::pmr::unsynchronized_pool_resource resource{};
      PMRExpr e{&resource};
      ss.str(s);
      ss >> e;
      PMRExpr copy{e, &resource};
      benchmark::DoNotOptimize(copy.evaluate());
    }
  }
}

BENCHMARK(BM_Pool);

static void BM_MonotonicPool(benchmark::State& state) {
  std::ostringstream sstr;
  sstr << std::ifstream("test-0.700.in").rdbuf();
  std::string s = sstr.str();
  std::unique_ptr pbuf = std::make_unique<std::array<char, 8'000'000>>();
  auto& buf = *pbuf;
  for (auto _ : state) {
    std::istringstream ss;
    for (size_t i = 0; i < 1024; ++i) {
      std::pmr::monotonic_buffer_resource buf_resource{buf.begin(),
                                                       buf.size()};
      std::pmr::unsynchronized_pool_resource resource{&buf_resource};
      PMRExpr e{&resource};
      ss.str(s);
      ss >> e;
      PMRExpr copy{e, &resource};
      benchmark::DoNotOptimize(copy.evaluate());
    }
  }
}

BENCHMARK(BM_MonotonicPool);

static void BM_MonotonicPoolNoDestroy(benchmark::State& state) {
  std::ostringstream sstr;
  sstr << std::ifstream("test-0.700.in").rdbuf();
  std::string s = sstr.str();
  std::unique_ptr pbuf = std::make_unique<std::array<char, 8'000'000>>();
  auto& buf = *pbuf;
  for (auto _ : state) {
    std::istringstream ss;
    for (size_t i = 0; i < 1024; ++i) {
      std::pmr::monotonic_buffer_resource buf_resource{buf.begin(),
                                                       buf.size()};
      std::pmr::unsynchronized_pool_resource resource{&buf_resource};
      NoDestroyPMRExpr e{&resource};
      ss.str(s);
      ss >> e;
      NoDestroyPMRExpr copy{e, &resource};
      benchmark::DoNotOptimize(copy.evaluate());
    }
  }
}

BENCHMARK(BM_MonotonicPoolNoDestroy);

BENCHMARK_MAIN();

Basic Intro to C++ Multithreading and Allocators (Advanced)

1 Multithreading

1.1 Demo 0: OpenMP

1.2 Demo 1: Game of life with OpenMP

1.3 Demo 2: `<algorithm>` Execution policies

1.4 Demo 3: Using execution policies to parallelise game of life

1.5 Demo 4: Using `<thread>` for multi-threading

1.6 Demo 5: Creating a task pool to parallelise game of life

2 Allocators

dummy

1 Multithreading

1.1 Demo 0: OpenMP

1.2 Demo 1: Game of life with OpenMP

1.3 Demo 2: <algorithm> Execution policies

1.4 Demo 3: Using execution policies to parallelise game of life

1.5 Demo 4: Using <thread> for multi-threading

1.6 Demo 5: Creating a task pool to parallelise game of life

2 Allocators

dummy

1.3 Demo 2: `<algorithm>` Execution policies

1.5 Demo 4: Using `<thread>` for multi-threading