C++ Move Semantics

Written by:

Last updated: 30 June 2022

1 Introduction to move semantics

Full godbolt link

Full code snippet for this section
// main.cpp

#include <cassert>
#include <initializer_list>
#include <iostream>
#include <vector>

struct Copy {
  Copy() : x(0) {}

  Copy(const Copy&) : x(1) {
    std::cout << "copy constructor\n";
  }

  Copy& operator=(const Copy& other) {
    std::cout << "copy assign\n";
    Copy copy{other};
    this->swap(copy);
    return *this;
  }

  ~Copy() {}

  void swap(Copy& other) {
    std::swap(other.x, this->x);
  }
  int x;
};

template <typename ElemTy>
struct SimpleVectorOrig {
private:
  ElemTy* m_buffer;
  size_t m_size;

public:
  SimpleVectorOrig() : m_buffer(nullptr), m_size(0) {}
  SimpleVectorOrig(const SimpleVectorOrig& other) : m_size(other.m_size) {
    m_buffer = new ElemTy[m_size];
    for (size_t i = 0; i < m_size; i++) {
      m_buffer[i] = other.m_buffer[i];
    }
  }
  SimpleVectorOrig(std::initializer_list<ElemTy> init_list)
      : m_size(init_list.size()) {
    m_buffer = new ElemTy[m_size];
    for (size_t i = 0; i < init_list.size(); i++) {
      m_buffer[i] = std::data(init_list)[i];
    }
  }
  ~SimpleVectorOrig() {
    delete[] m_buffer;
  }

  SimpleVectorOrig& operator=(const SimpleVectorOrig& other) {
    SimpleVectorOrig copy{other};
    this->swap(copy);

    return *this;
  }

  void push_back(const ElemTy& element) {
    ElemTy* new_buffer = new ElemTy[m_size + 1];
    for (size_t i = 0; i < m_size; i++) {
      new_buffer[i] = m_buffer[i];
    }
    new_buffer[m_size] = element;
    m_size += 1;

    delete[] m_buffer;
    m_buffer = new_buffer;
  }
  ElemTy pop_back() {
    assert(m_size > 0);
    ElemTy* new_buffer = new ElemTy[m_size - 1];
    for (size_t i = 0; i < m_size - 1; i++) {
      new_buffer[i] = m_buffer[i];
    }

    ElemTy last_elem = m_buffer[m_size - 1];
    m_size -= 1;

    delete[] m_buffer;
    m_buffer = new_buffer;

    return last_elem;
  }
  size_t size() const {
    return m_size;
  }
  ElemTy& operator[](size_t idx) {
    return m_buffer[idx];
  }
  const ElemTy& operator[](size_t idx) const {
    return m_buffer[idx];
  }
  void swap(SimpleVectorOrig& other) {
    std::swap(m_buffer, other.m_buffer);
    std::swap(m_size, other.m_size);
  }
  friend void swap(SimpleVectorOrig& a, SimpleVectorOrig& b) {
    a.swap(b);
  }
  friend std::ostream& operator<<(std::ostream& os,
                                  SimpleVectorOrig& vec) {
    os << "[";
    for (size_t i = 0; i < vec.size(); i++) {
      if (i > 0) {
        os << ", ";
      }
      os << vec[i];
    }
    os << "]";
    return os;
  }
};

struct Move {
  Move() : x(0) {}

  Move(const Move&) : x(1) {
    std::cout << "copy constructor\n";
  }
  Move(Move&&) : x(2) {
    std::cout << "move constructor\n";
  }

  Move& operator=(const Move& other) {
    std::cout << "copy assign\n";
    Move copy{other};
    this->swap(copy);
    return *this;
  }

  Move& operator=(Move&& other) {
    std::cout << "move assign\n";
    this->swap(other);
    return *this;
  }

  ~Move() {}

  void swap(Move& other) {
    std::swap(other.x, this->x);
  }
  int x;
};

#include <chrono>

void take_value(Move m) {
  (void) m;
  std::cout << "took\n";
}

struct Foozle {
  int x;
};
void take_rvalue_ref(Foozle&& x) {
  std::cout << "x = "  //
            << x.x << "\n";
}

int main() {
  SimpleVectorOrig<Copy> copies;
  std::cout << "initial size: "  //
            << copies.size() << "\n\n";

  std::cout << "<1\n";
  std::cout << "pushing 1:\n";
  copies.push_back(Copy{});

  std::cout << "\npushing 2:\n";
  copies.push_back(Copy{});
  std::cout << "1>\n";

  struct NoMove {
    NoMove() {
      xs.resize(100);
    }
    NoMove(const NoMove&) = default;
    NoMove& operator=(const NoMove&) = default;

    std::vector<int> xs;
  };

  struct Movable {
    Movable() {  //
      xs.resize(100);
    }
    std::vector<int> xs;
  };

  {
    std::cout << "<2\n";
    namespace sc = std::chrono;
    using ms = sc::milliseconds;

    auto start = sc::system_clock::now();
    std::vector<NoMove> cant_move_me{};
    for (size_t i = 0; i < 50000; i++) {
      cant_move_me.push_back(NoMove{});
    }
    std::cout
        << "elapsed (not-movable) = "
        << sc::duration_cast<ms>(sc::system_clock::now() - start).count()
        << " ms\n";

    start = sc::system_clock::now();
    std::vector<Movable> can_move_me{};

    for (size_t i = 0; i < 50000; i++) {
      can_move_me.push_back(Movable{});
    }
    std::cout
        << "elapsed (movable) = "
        << sc::duration_cast<ms>(sc::system_clock::now() - start).count()
        << " ms\n";
    std::cout << "2>\n";
  }

  {
    std::cout << "<3\n";
    Move m1{};
    Move m2{};

    std::cout << "copies:\n";
    Move m3 = m1;
    m2 = m1;

    std::cout << "\n";
    std::cout << "moves:\n";
    Move m4 = std::move(m2);
    m2 = std::move(m1);

    std::cout << "3>\n";
  }

  {
    std::vector xs{Move{},  //
                   Move{},
                   Move{}};

    std::cout << "<4\n"
              << "copying:\n";
    auto copy = xs;

    std::cout << "\n";
    std::cout << "moving:\n";
    auto move = std::move(xs);

    std::cout << "\n";

    std::cout << "4>\n";
  }

  {
    std::cout << "<5\n";

    Move m{};

    std::cout << "copy:\n";
    take_value(m);

    std::cout << "\n"
              << "move:\n";
    take_value(std::move(m));

    std::cout << "5>\n";
  }

  {
    // `Foozle{}` is a temporary here
    take_rvalue_ref(Foozle{100});

    // note: Foozle&& cannot bind
    // to lvalue-references!
    auto f = Foozle{2};
    // compile error:
    // take_rvalue_ref(f);
    (void) f;
  }

  {
    auto f = Foozle{100};
    take_rvalue_ref(static_cast<Foozle&&>(f));
  }
}

This entire lecture will be focusing on move semantics; why we should use move, how to use movable classes, and how to make your own classes movable.

1.1 Ownership

Recall that in Lecture 3, we discussed the concept of ownership, and how classes can be written with destructors and overloaded copy constructors and assignment operators to implement correct ownership semantics (eg. of heap memory). We also saw how the Rule of 0/3 comes into play here.

Once we understand the concept of ownership, move semantics can be simply summed up as transferring ownership from one instance of a class to another.

1.2 Motivation behind moving

To answer the question of why we want to be able to move things, let’s start with a simple class that just lets us see the number of times something has been copied (either via copy constructor, or copy assignment operator):

struct Copy {
  Copy() : x(0) {}

  Copy(const Copy&) : x(1) {
    std::cout << "copy constructor\n";
  }

  Copy& operator=(const Copy& other) {
    std::cout << "copy assign\n";
    Copy copy{other};
    this->swap(copy);
    return *this;
  }

  ~Copy() {}

  void swap(Copy& other) {
    std::swap(other.x, this->x);
  }
  int x;
};
Snippet 1: A simple class that just prints something when it’s copied

1.2.1 Investigating SimpleVector

Next, let’s revisit the SimpleVector class we implemented in Lecture 4. Recall that we had to copy-assign elements from the old buffer to the new buffer whenever we wanted to resize the vector. For example, SimpleVector::push_back looks like this:

  void push_back(const ElemTy& element) {
    ElemTy* new_buffer = new ElemTy[m_size + 1];
    for (size_t i = 0; i < m_size; i++) {
      new_buffer[i] = m_buffer[i];
    }
    new_buffer[m_size] = element;
    m_size += 1;

    delete[] m_buffer;
    m_buffer = new_buffer;
  }
Snippet 2: The implementation of SimpleVector::push_back
Full implementation of SimpleVector

For completeness, we reproduce the full implementation of SimpleVector from Lecture 3 here:

template <typename ElemTy>
struct SimpleVectorOrig {
private:
  ElemTy* m_buffer;
  size_t m_size;

public:
  SimpleVectorOrig() : m_buffer(nullptr), m_size(0) {}
  SimpleVectorOrig(const SimpleVectorOrig& other) : m_size(other.m_size) {
    m_buffer = new ElemTy[m_size];
    for (size_t i = 0; i < m_size; i++) {
      m_buffer[i] = other.m_buffer[i];
    }
  }
  SimpleVectorOrig(std::initializer_list<ElemTy> init_list)
      : m_size(init_list.size()) {
    m_buffer = new ElemTy[m_size];
    for (size_t i = 0; i < init_list.size(); i++) {
      m_buffer[i] = std::data(init_list)[i];
    }
  }
  ~SimpleVectorOrig() {
    delete[] m_buffer;
  }

  SimpleVectorOrig& operator=(const SimpleVectorOrig& other) {
    SimpleVectorOrig copy{other};
    this->swap(copy);

    return *this;
  }

  void push_back(const ElemTy& element) {
    ElemTy* new_buffer = new ElemTy[m_size + 1];
    for (size_t i = 0; i < m_size; i++) {
      new_buffer[i] = m_buffer[i];
    }
    new_buffer[m_size] = element;
    m_size += 1;

    delete[] m_buffer;
    m_buffer = new_buffer;
  }
  ElemTy pop_back() {
    assert(m_size > 0);
    ElemTy* new_buffer = new ElemTy[m_size - 1];
    for (size_t i = 0; i < m_size - 1; i++) {
      new_buffer[i] = m_buffer[i];
    }

    ElemTy last_elem = m_buffer[m_size - 1];
    m_size -= 1;

    delete[] m_buffer;
    m_buffer = new_buffer;

    return last_elem;
  }
  size_t size() const {
    return m_size;
  }
  ElemTy& operator[](size_t idx) {
    return m_buffer[idx];
  }
  const ElemTy& operator[](size_t idx) const {
    return m_buffer[idx];
  }
  void swap(SimpleVectorOrig& other) {
    std::swap(m_buffer, other.m_buffer);
    std::swap(m_size, other.m_size);
  }
  friend void swap(SimpleVectorOrig& a, SimpleVectorOrig& b) {
    a.swap(b);
  }
  friend std::ostream& operator<<(std::ostream& os,
                                  SimpleVectorOrig& vec) {
    os << "[";
    for (size_t i = 0; i < vec.size(); i++) {
      if (i > 0) {
        os << ", ";
      }
      os << vec[i];
    }
    os << "]";
    return os;
  }
};

When we do new_buffer[i] = m_buffer[i], that is performing copy assignment. Let’s start by making an empty SimpleVector of Copy:

  SimpleVectorOrig<Copy> copies;
  std::cout << "initial size: "  //
            << copies.size() << "\n\n";
$ ./main.out | grep 'initial'
initial size: 0
Snippet 3: Creating a small SimpleVector

That was pretty simple, so let’s start adding some elements:

  std::cout << "pushing 1:\n";
  copies.push_back(Copy{});

  std::cout << "\npushing 2:\n";
  copies.push_back(Copy{});
$ ./main.out | sed -n '1,/<1/d;/1>/q;p'
pushing 1:
copy assign
copy constructor
pushing 2:
copy assign
copy constructor
copy assign
copy constructor
Snippet 4: Adding elements to our SimpleVector of Copy

Looking at the first push_back ("pushing back 1"), we see one copy-constructor call and one copy-assignment call; this is expected, because we used the copy-and-swap idiom when implementing the copy-assignment operator for Copy.

For the second push_back, we already have one element in the vector; we see two sets of copies, one for the existing element (we copy it from the old to the new buffer) and one for the new element.

1.2.2 Performance of copying

If we think about it, why do we need to perform a copy of the existing element? Why don’t we simply… move the element from the old buffer to the new one?

The main use case for this is when the contained type isn’t a cheap struct that contains a simple int, but rather something like another std::vector that takes non-trivial time and memory to copy. In such situations we definitely do not want to make pointless copies of the element type.

Let’s see how much of a difference this can make. Let’s start with defining two types, one of which is movable and one of which is not movable:

  struct NoMove {
    NoMove() {
      xs.resize(100);
    }
    NoMove(const NoMove&) = default;
    NoMove& operator=(const NoMove&) = default;

    std::vector<int> xs;
  };
  struct Movable {
    Movable() {  //
      xs.resize(100);
    }
    std::vector<int> xs;
  };
Snippet 5: Defining a non-movable and movable type
Making a type non-movable

Typically it doesn’t make sense to explicitly design a type that can be copied but not moved, but we’re illustrating a point here so we have to do it.

The way we make it happen is by declaring a copy-constructor and copy-assignment operator, but just make them defaulted so the compiler generates the implementation — what we want here is to actually disable the implicitly declared move-constructor and move-assignment operator.

Note that this is different from explicitly deleting them, eg:

NoMove(NoMove&&) = delete;

which would prevent even copy construction from working correctly, since initialization from a prvalue or xvalue would select the deleted function which results in a compile error. If the implicitly declared (or explicitly defaulted) move constructor is (implicitly) deleted, then it is ignored by overload resolution, which is what we want.

The implicitly declared move constructor is deleted if the class contains non-move-constructible types, or if the copy constructor is explicitly declared (whether defaulted or not). See the cppreference page for more information.

Next, we just have some code that repeatedly calls push_back into a vector of these two types, and measures the time taken.

    auto start = sc::system_clock::now();
    std::vector<NoMove> cant_move_me{};
    for (size_t i = 0; i < 50000; i++) {
      cant_move_me.push_back(NoMove{});
    }
    std::cout
        << "elapsed (not-movable) = "
        << sc::duration_cast<ms>(sc::system_clock::now() - start).count()
        << " ms\n";

    start = sc::system_clock::now();
    std::vector<Movable> can_move_me{};

    for (size_t i = 0; i < 50000; i++) {
      can_move_me.push_back(Movable{});
    }
    std::cout
        << "elapsed (movable) = "
        << sc::duration_cast<ms>(sc::system_clock::now() - start).count()
        << " ms\n";
Snippet 6: Some code that does a simple benchmark on inserting movable vs non-movable types into std::vector

If we run it, we clearly see that the performance with movable types is a lot better than with a non-movable type:

$ ./main.O3.out | sed -n '1,/<2/d;/2>/q;p'
elapsed (not-movable) = 38 ms
elapsed (movable) = 16 ms

Why is this? The simple reason is that moving a std::vector is very cheap, but copying a std::vector is very expensive. For our non-movable type (NoMove), we are forced to perform a copy of the std::vector field, whereas our Movable type can simply perform a move.

1.3 Using move semantics

Before we try to implement move semantics for our SimpleVector, let’s try to understand how to use them first. The basic idea is to simply call std::move, and you can think of that as turning an object into something that can be moved from.

First, let’s augment our Copy type into a Move type that supports moving:

struct Move {
  Move() : x(0) {}

  Move(const Move&) : x(1) {
    std::cout << "copy constructor\n";
  }
  Move(Move&&) : x(2) {
    std::cout << "move constructor\n";
  }

  Move& operator=(const Move& other) {
    std::cout << "copy assign\n";
    Move copy{other};
    this->swap(copy);
    return *this;
  }

  Move& operator=(Move&& other) {
    std::cout << "move assign\n";
    this->swap(other);
    return *this;
  }

  ~Move() {}

  void swap(Move& other) {
    std::swap(other.x, this->x);
  }
  int x;
};
Snippet 7: A Move struct that is very similar to our Copy struct, but supports moving

Don’t worry too much about the details for now — we’ll talk about what exactly they mean later.

1.3.1 Simple moving

First, let’s just try moving our type around:

    Move m1{};
    Move m2{};

    std::cout << "copies:\n";
    Move m3 = m1;
    m2 = m1;

    std::cout << "\n";
    std::cout << "moves:\n";
    Move m4 = std::move(m2);
    m2 = std::move(m1);
$ ./main.out | sed -n '1,/<3/d;/3>/q;p'
copies:
copy constructor
copy assign
copy constructor
moves:
move constructor
move assign
Snippet 8: Moving our Move type around

As we can see, for the move part we get exactly one move constructor call, and one move assignment call (as explained previously for the copy, we use copy-and-swap so there’s an extra copy constructor call).

In order to make an object movable, we simply call std::move with it. This function simply performs a cast of its argument into a type of reference that can be moved from — an rvalue reference. We’ll cover more about how std::move works, but for now it suffices to remember that an rvalue reference is something like Move&&.

By providing a value of this type (Move&&), we select the appropriate overload of the move constructor and/or move assignment operator, Move(Move&&) and operator=(Move&&) respectively.

1.3.2 Moving a std::vector

Now, let’s make a std::vector of our new type, and see what we get. Note that we first copy-initialize copy, but then move-initialize move, by using std::move. Before we talk more about it, let’s just run the code:

    std::vector xs{Move{},  //
                   Move{},
                   Move{}};

    std::cout << "<4\n"
              << "copying:\n";
    auto copy = xs;

    std::cout << "\n";
    std::cout << "moving:\n";
    auto move = std::move(xs);

    std::cout << "\n";
$ ./main.out | sed -n '1,/<4/d;/4>/q;p'
copying:
copy constructor
copy constructor
copy constructor
moving:
Snippet 9: Trying out our Move struct with a std::vector

Wait, there’s no output for the “moving” part? That’s right, none of the move-related functions are called for our type because std::vector moves simply by re-pointing the buffer.

To see how this works, let’s revisit the layout of a std::vector:

The layout of a std::vector

Note that we assign some arbitrary values to size and capacity just for illustration.

As we can see, the state of this vector consists of 3 things: the number of elements (size), the capacity of the buffer (capacity), and the pointer to the buffer itself (buffer). To move a vector, we simply need to transfer these 3 values to another vector, and the ownership of the memory would also be semantically transferred.

That would look something like this:

Moving the vector from A to B

Since we only need to transfer 3 fields (which, on the hardware level, are just 3 numbers), this is very cheap. Contrast this with a copy of the vector, which needs to create a separate heap allocation, plus copy each of the members:

Copying the vector from A to B

Clearly, the copy is a lot more expensive. The same idea applies for our SimpleVector, just without the capacity field.

1.3.3 Moving into functions

Since constructors and assignment operators are just functions, we should expect to be able to move values into functions as well, and indeed we can. If a function takes a parameter by value, then we can use std::move at the call site to move an object into the function, avoiding a copy:

void take_value(Move m) {
  (void) m;
  std::cout << "took\n";
}
    Move m{};

    std::cout << "copy:\n";
    take_value(m);

    std::cout << "\n"
              << "move:\n";
    take_value(std::move(m));
$ ./main.out | sed -n '1,/<5/d;/5>/q;p'
copy:
copy constructor
took
move:
move constructor
took
Snippet 10: Moving vs. copying into a function

As we can see, calling the function without using std::move results in a copy, while moving results in no copy (and we call the move constructor instead). Essentially, the function’s parameter (here, m) needs to be constructed in the function; we can either copy construct or move construct, depending on the caller’s argument.

1.4 What happens to the moved-from value?

One thing that you might be asking is, what happens to the object that we move out of? That is completely up to the implementation of the class. However, for almost every single type that you implement, you should ensure that a moved-from object is still valid.

In fact, the standard guarantees that all standard library objects will be left in a valid but unspecified state after they are moved from.

As we’ve seen above for example, a moved-from std::vector essentially becomes an empty vector again. You can continue to push_back, assign, etc.

1.4.1 A note on ownership

It should be noted that since we have “stolen” the ownership of the resource from the object we moved from, there’s still now only one owner of the resource.

This means that we won’t get into situations of double-free — as long as you implement your move functions correctly.

2 Rvalue references

Before we can talk about how to implement move semantics, we should discuss rvalue references. As we’ve seen previously, C++ has references — the ones that we’ve seen so far have all been lvalue references, like int&, ElemTy&, etc.

We now introduce rvalue references, which look like this: int&&, ElemTy&&, etc. The idea is that, just as lvalue references bind to lvalues, rvalue references bind to rvalues. What are these values? Well, we’ll discuss value categories a little further below, but for now a good intuition is that rvalue references usually bind to temporaries, and lvalue references usually bind to objects.

For example, in the following code:

int x = 0;
int& lref1 = x;         // works
int&& rref1 = x;        // doesn't work

Foo&& rref2 = Foo{};    // works
Foo& lref2 = Foo{};     // doesn't work
Snippet 11: Lvalue references can only bind to lvalues, and rvalue references can only bind to rvalues

As we see, lvalue references cannot bind to temporaries, and rvalue references cannot bind to objects (like x).

Just for illustration, note that the same mechanics also apply to function calls:

struct Foozle {
  int x;
};
void take_rvalue_ref(Foozle&& x) {
  std::cout << "x = "  //
            << x.x << "\n";
}
    // `Foozle{}` is a temporary here
    take_rvalue_ref(Foozle{100});

    // note: Foozle&& cannot bind
    // to lvalue-references!
    auto f = Foozle{2};
    // compile error:
    // take_rvalue_ref(f);
    (void) f;
Snippet 12: Further illustration of the allowed usage of rvalue references
How does an rvalue reference to a temporary work?

Rvalue references being able to bind to temporaries kind of begs the question — do we get dangling references? The answer is, as usual, it depends.

When you do something like this:

int&& x = 100;

What actually happens is that, if an rvalue reference is initialized with a temporary, then we get temporary materialisation, and the resulting temporary acts like a normal object, ie. it lives until the end of the scope.

So, the same rules apply about dangling references — if the temporary goes out of scope and you use the reference, it’s undefined behaviour.

2.1 Obtaining an rvalue reference

In order to get an rvalue reference to an object, we can use std::move as seen above. If we look at the implementation of std::move:

template <class _Tp>
typename remove_reference<_Tp>::type&& move(_Tp&& __t) {
  using _Up = remove_reference<_Tp>::type;
  return static_cast<_Up&&>(__t);
}
Snippet 13: The implementation of std::move in libc++

It looks complicated, but if we ignore the template magic and focus on the body, we see that it really is just a static cast to a T&&. In fact, we can do this ourselves:

    auto f = Foozle{100};
    take_rvalue_ref(static_cast<Foozle&&>(f));
Snippet 14: If we don’t need to make a generic function, we can just move with static_cast

We’ll cover the template workings of std::move below, but for now, just know that it works and you can use it :D.

3 Implementing a NullableOwnPtr

Full godbolt link

Full code snippet for this section
// ownptr/main.cpp

#include <cassert>
#include <iostream>

template <typename T>
struct NullableOwnPtr {
private:
  T* m_ptr;

public:
  NullableOwnPtr() {
    m_ptr = new T{};
  }
  ~NullableOwnPtr() {
    delete m_ptr;
  }

  // copy constructor
  NullableOwnPtr(const NullableOwnPtr& other) {
    if (other.m_ptr) {
      m_ptr = new T{*other.m_ptr};
    } else {
      m_ptr = nullptr;
    }
  }

  // copy-assignment operator
  NullableOwnPtr& operator=(const NullableOwnPtr& other) {
    auto copy = NullableOwnPtr{other};
    this->swap(copy);
    return *this;
  }

  NullableOwnPtr(NullableOwnPtr&& other) : m_ptr(nullptr) {
    std::swap(m_ptr, other.m_ptr);
  }

  NullableOwnPtr& operator=(NullableOwnPtr&& other) {
    auto moved = NullableOwnPtr{std::move(other)};
    this->swap(moved);
    return *this;
  }

  bool has_value() const {
    return m_ptr != nullptr;
  }

  // convenience operator overloads
  T* operator->() {
    return m_ptr;
  }
  const T* operator->() const {
    return m_ptr;
  }

  T& operator*() {
    assert(m_ptr != nullptr);
    return *m_ptr;
  }

  const T& operator*() const {
    assert(m_ptr != nullptr);
    return *m_ptr;
  }

  // swap member function
  void swap(NullableOwnPtr& other) {
    using std::swap;
    swap(other.m_ptr, m_ptr);
  }

  // non-member swap function
  friend void swap(NullableOwnPtr& a, NullableOwnPtr& b) {  //
    a.swap(b);
  }

  template <typename... Args>
  NullableOwnPtr(Args&&... args) {
    m_ptr = new T{std::forward<Args>(args)...};
  }

};

int main() {
  struct Foo {
    // default constructor is deleted
    Foo(int&& x, int& y) : x(x), y(y) {}

    int x;
    int y;
  };

  int y = 20;
  auto foo = NullableOwnPtr<Foo>(10, y);
  std::cout << "foo = ("       //
            << foo->x << ", "  //
            << foo->y << ")\n";

  // this is a compile error
  // auto bar = NullableOwnPtr<Foo>();
}

To illustrate how to implement move semantics, we start with a simple class that’s similar to the OwnedInt we saw in previous lectures. The key differences here are that it will be nullable (ie. might own nothing), and be movable.

Let’s start by implementing the basic NullableOwnPtr that just supports copying:

template <typename T>
struct NullableOwnPtr {
private:
  T* m_ptr;

public:
  NullableOwnPtr() {
    m_ptr = new T{};
  }
  ~NullableOwnPtr() {
    delete m_ptr;
  }

  // copy constructor
  NullableOwnPtr(const NullableOwnPtr& other) {
    if (other.m_ptr) {
      m_ptr = new T{*other.m_ptr};
    } else {
      m_ptr = nullptr;
    }
  }

  // copy-assignment operator
  NullableOwnPtr& operator=(const NullableOwnPtr& other) {
    auto copy = NullableOwnPtr{other};
    this->swap(copy);
    return *this;
  }
  bool has_value() const {
    return m_ptr != nullptr;
  }

  // convenience operator overloads
  T* operator->() {
    return m_ptr;
  }
  const T* operator->() const {
    return m_ptr;
  }

  T& operator*() {
    assert(m_ptr != nullptr);
    return *m_ptr;
  }

  const T& operator*() const {
    assert(m_ptr != nullptr);
    return *m_ptr;
  }

  // swap member function
  void swap(NullableOwnPtr& other) {
    using std::swap;
    swap(other.m_ptr, m_ptr);
  }

  // non-member swap function
  friend void swap(NullableOwnPtr& a, NullableOwnPtr& b) {  //
    a.swap(b);
  }
};
Snippet 15: The basic implementation of NullableOwnPtr

As we’ve seen these functions a couple of times before, we’re not going to spend too much time going through how they work — in fact, they’re actually simplified versions of those we saw in SimpleVector before.

Note that we’ve taken some care to check that m_ptr is not nullptr — the reason for this should already be evident.

3.1 Implementing move semantics

In order to implement move semantics for our NullableOwnPtr, we just need to overload the move assignment operator and the move constructor. As we mentioned above, a moved-from instance of our NullableOwnPtr should be left in a valid state, and in this case our state is that we own nothing — and the internal pointer is null.

The arguments for the move-constructor and move-assignment operator are an rvalue ref; when we give it a temporary or the result of a std::move, this matches the argument type, and calls the correct overload (our move functions).

3.1.1 Move constructor

Let’s start with the move constructor first:

  NullableOwnPtr(NullableOwnPtr&& other) : m_ptr(nullptr) {
    std::swap(m_ptr, other.m_ptr);
  }
Snippet 16: NullableOwnPtr’s move constructor

That’s really it. We first set our own pointer to nullptr, and then swap it with the other object. This means that we get their pointer, and they get ours — which is now null. Effectively, we’ve “stolen” the ownership of the pointer from the other instance.

As for the signature, we take in an rvalue reference (ie. NullableOwnPtr&&), which is typically either a temporary, or the result of a call to std::move.

3.1.2 Move-assignment operator

We can now implement the move-assignment operator (which has a similar signature):

  NullableOwnPtr& operator=(NullableOwnPtr&& other) {
    auto moved = NullableOwnPtr{std::move(other)};
    this->swap(moved);
    return *this;
  }
Snippet 17: NullableOwnPtr’s move assignment operator

Note that here, we’ve chosen to implement it with the move-and-swap idiom (which is the lesser-known cousin of the copy-and-swap idiom previously discussed). We’re essentially implementing the move-assignment operator in terms of the move constructor, and this lets us sidestep the problem of self-copy (or, in this case, self-move) that we’ve seen in the past.

3.2 Rule of 5

We’ve seen the rule of 3 before, and in fact our starting implementation of NullableOwnPtr also followed the rule of 3. As you might guess, the rule of 5 is simply an extension of the rule of 3.

In fact, if your class follows the rule of 3 (ie. has user-defined copy assignment and copy constructor), then it should almost always also implement the move versions of those:

Because the presence of a user-defined (or = default or = delete declared) destructor, copy-constructor, or copy-assignment operator prevents implicit definition of the move constructor and the move assignment operator, any class for which move semantics are desirable, has to declare all five special member functions.

cppreference

4 Making SimpleVector movable

Full godbolt link

Full code snippet for this section
// simplevector.cpp

#include <algorithm>
#include <cassert>
#include <cstddef>
#include <initializer_list>
#include <iostream>

template <typename ElemTy>
struct SimpleVector {
private:
  ElemTy* m_buffer;
  size_t m_size;

public:
  SimpleVector() : m_buffer(nullptr), m_size(0) {}
  SimpleVector(const SimpleVector& other) : m_size(other.m_size) {
    m_buffer = new ElemTy[m_size];
    for (size_t i = 0; i < m_size; i++) {
      m_buffer[i] = other.m_buffer[i];
    }
  }

  SimpleVector(std::initializer_list<ElemTy> init_list)
      : m_size(init_list.size()) {
    m_buffer = new ElemTy[m_size];
    for (size_t i = 0; i < init_list.size(); i++) {
      m_buffer[i] = init_list.begin()[i];
    }
  }
  ~SimpleVector() {
    delete[] m_buffer;
  }

  SimpleVector& operator=(const SimpleVector& other) {
    SimpleVector copy{other};
    this->swap(copy);

    return *this;
  }

  SimpleVector(SimpleVector&& other) : m_buffer(nullptr), m_size(0) {
    std::swap(m_buffer, other.m_buffer);
    std::swap(m_size, other.m_size);
  }

  SimpleVector& operator=(SimpleVector&& other) {
    auto moved = SimpleVector{std::move(other)};
    this->swap(moved);
    return *this;
  }

#if 0
  SimpleVector& operator=(SimpleVector other) {
    auto moved = SimpleVector{std::move(other)};
    this->swap(moved);
    return *this;
  }
#endif

  void push_back(ElemTy element) {
    auto new_buffer = make_new_buffer(m_size + 1);
    new_buffer[m_size] = std::move(element);
    m_size += 1;

    delete[] m_buffer;
    m_buffer = new_buffer;
  }

  ElemTy pop_back() {
    assert(m_size > 0);
    auto new_buffer = make_new_buffer(m_size - 1);

    ElemTy last_elem = std::move(m_buffer[m_size - 1]);
    m_size -= 1;

    delete[] m_buffer;
    m_buffer = new_buffer;

    return last_elem;
  }

  template <typename... Args>
  void emplace_back(Args&&... args) {
    auto new_buffer = make_new_buffer(m_size + 1);
    new_buffer[m_size] = ElemTy(std::forward<Args>(args)...);

    m_size += 1;
    delete[] m_buffer;
    m_buffer = new_buffer;
  }

  size_t size() const {
    return m_size;
  }
  ElemTy& operator[](size_t idx) {
    return m_buffer[idx];
  }
  const ElemTy& operator[](size_t idx) const {
    return m_buffer[idx];
  }
  void swap(SimpleVector& other) {
    std::swap(m_buffer, other.m_buffer);
    std::swap(m_size, other.m_size);
  }
  friend void swap(SimpleVector& a, SimpleVector& b) {
    a.swap(b);
  }
  friend std::ostream& operator<<(std::ostream& os, SimpleVector& vec) {
    os << "[";
    for (size_t i = 0; i < vec.size(); i++) {
      if (i > 0) {
        os << ", ";
      }
      os << vec[i];
    }
    os << "]";
    return os;
  }

private:
  ElemTy* make_new_buffer(size_t new_size) {
    auto new_buffer = new ElemTy[new_size];
    for (size_t i = 0; i < std::min(new_size, m_size); i++) {
      // instead of copy-assigning from the old to the new,
      // do move-assignment to avoid copies!
      new_buffer[i] = std::move(m_buffer[i]);
    }

    return new_buffer;
  }
};

int main() {
  struct Foo {
    Foo() {}
    Foo(int x, int y) : x(x), y(y) {}

    int x;
    int y;
  };

  SimpleVector<Foo> foos{};
  foos.emplace_back(69, 420);

  std::cout << "foo = ("          //
            << foos[0].x << ", "  //
            << foos[0].y << ")\n";
}

Now that we’ve seen how to make basic movable classes, let’s do the same for SimpleVector.

4.1 Move constructor and move-assignment:

This is the simple part, since it’s almost exactly the same as NullableOwnPtr:

  SimpleVector(SimpleVector&& other) : m_buffer(nullptr), m_size(0) {
    std::swap(m_buffer, other.m_buffer);
    std::swap(m_size, other.m_size);
  }
Snippet 18: The move constructor for SimpleVector
  SimpleVector& operator=(SimpleVector&& other) {
    auto moved = SimpleVector{std::move(other)};
    this->swap(moved);
    return *this;
  }
Snippet 19: The move assignment operator for SimpleVector

There’s nothing much to say. Again note that we leave our vector in a valid state after it’s been moved from; m_buffer is null and m_size is zero, which is exactly the same as a default-constructed empty vector.

4.2 Efficient push_back and pop_back

To implement these efficiently, we should first uphold some “good software engineering practices” and factor out the vector resizing into a separate function:

  ElemTy* make_new_buffer(size_t new_size) {
    auto new_buffer = new ElemTy[new_size];
    for (size_t i = 0; i < std::min(new_size, m_size); i++) {
      // instead of copy-assigning from the old to the new,
      // do move-assignment to avoid copies!
      new_buffer[i] = std::move(m_buffer[i]);
    }

    return new_buffer;
  }
Snippet 20: A function that makes a new buffer and moves over the elements from the old one

As noted in the comment, we are using std::move to efficiently move the elements from the old buffer into the new one, so that we avoid copies whenever possible.

4.2.1 push_back

Next, we can implement push_back using this function:

  void push_back(ElemTy element) {
    auto new_buffer = make_new_buffer(m_size + 1);
    new_buffer[m_size] = std::move(element);
    m_size += 1;

    delete[] m_buffer;
    m_buffer = new_buffer;
  }
Snippet 21: The better implementation of push_back

One key point to note is that we are now taking the new element by value. This might seem inefficient, but it actually isn’t. There are three cases:

  1. The caller gives us an object (that would normally be bound to an lvalue reference). We would have to make a copy into the new buffer anyway (since we cannot move from an lvalue reference), so we don’t lose anything here.
  2. The caller gives us a prvalue, eg. push_back(Foo{1}). The parameter is constructed in-place due to copy elision, which we then move-assign into the new buffer, with no copies.
  3. The caller gives us an xvalue, eg. push_back(std::move(x)). We move-construct the parameter, which is then move-assigned into the buffer, again without copying.

We mentioned prvalues and xvalues here; we’ll talk about these value categories properly below, but for now you can associate them with the examples above.

But why dooes std::vector::push_back have two separate overloads?

The reason for this is simply for optimisation purposes. Notice that, for example, std::vector::push_back has two overloads:

void push_back(const T& x);
void push_back(T&& x);
Snippet 22: The two overloads of push_back

If we have caller code like this:

Foo x { ... };
std::vector<Foo> xs { ... };
xs.push_back(std::move(x));
Snippet 23: std::move-ing into a vector via push_back

Then we would first call the move constructor to construct an instance of Foo in the push_back function, and then that instance is then move-assigned into the buffer. This means we perform two moves, which might not be desirable.

To get around this (and especially since the standard containers are designed to work with as wide a range of user types as possible), there are two overloads. For the same caller code, we call the second (rvalue-ref) overload (since we are taking a reference, no move constructor or assignment happens) and we only move-assign once, into the final buffer.

4.2.2 pop_back

This function is a lot simpler, now that we’ve written the helper.

  ElemTy pop_back() {
    assert(m_size > 0);
    auto new_buffer = make_new_buffer(m_size - 1);

    ElemTy last_elem = std::move(m_buffer[m_size - 1]);
    m_size -= 1;

    delete[] m_buffer;
    m_buffer = new_buffer;

    return last_elem;
  }
Snippet 24: The better implementation of pop_back

Note here that we use std::move to move the last element out of the old buffer.

4.2.3 By-value assignment operator

Let’s jump back a bit to the assignment operator. As we saw while discussing copy assignment in an earlier lecture, we can also implement the copy-assignment operator with a by-value argument. This assignment operator actually also works for move assignment as well!

  SimpleVector& operator=(SimpleVector other) {
    auto moved = SimpleVector{std::move(other)};
    this->swap(moved);
    return *this;
  }
Snippet 25: The by-value assignment operator for SimpleVector

The same principle that we saw for push_back applies here — either we had to make a copy anyway (making this act like a copy-assignment operator), or we get a temporary or a std::move-ed thing, and we don’t need to make a copy (making this act like move-assignment).

Some people call this the “rule of 4.5”, where we implement the destructor, copy constructor, move constructor, by-value assignment operator, and swap (swap is the 0.5).

4.3 SimpleVector summary

And that’s it — we’ve implemented the rule of 5 for SimpleVector, and reduced the number of unnecessary element copies by taking advantage of move semantics.

5 Value categories

Just as each iterator type can be assigned an iterator category, every expression can be assigned a value category.

Very important: value categories are given to expressions and NOT types!

Why is it not called expression category? Don’t ask me, ask Christopher Strachey et al..

5.1 Some historical intuition

In C, expressions can have one of 2 value categories:

The original intuition for this was to define the semantics of the = assignment operator.

For example, if we had the expression s.m where s is some struct containing a member named m, then it can be used on either the right-hand-side or the left-hand-side.

struct S {
  int m;
};
struct S s;
s.m = s.m;  // valid C
Snippet 26: s.m is an lvalue, allowed on both sides of =

We thus say that s.m is an lvalue, since it can be on the left-hand-side.

On the other hand, an expression like 0 can only be on the right-hand-side.

struct S {
  int m;
};
struct S s;
0 = 0;    // invalid C
0 = s.m;  // invalid C
s.m = 0;  // valid C
Snippet 27: rvalues are only allowed on the right hand side of =

Thus, we say that 0 is an rvalue, since it must be on the right-hand-side.

Think about how you would go about determining that s.m is an lvalue. Is it the .m? It is not, because the following is not valid.

struct S get_s() {
  return (struct S){0};
}
// .m of a temporary is still an rvalue
get_s().m = 0;  // invalid C
Snippet 28: Member of temporary is an rvalue

So how would you actually figure out whether an expression is assignable to? The way to do it is to analyze all the subexpressions and their types and build upwards until we get our answer.

In this case, we have a function call get_s() which is an rvalue. Accessing the member of an rvalue gives you an rvalue, so get_s().m is still an rvalue.

In the previous case, s was an lvalue, and accessing the member of an lvalue gives you an lvalue, so s.m is an lvalue.

The full rules for C value categories can be found here, and the rule we just used is:

Aside from assigning, another important operation is taking the address of the object referred to by an expression. Since this requires that an object exists, the criteria for being able to use the address of operator & is the same as the criteria for being able to assign to it (ignoring constness).

struct S s;
&s;        // ok
&get_s();  // not ok
Snippet 29: Taking address only allowed on lvalues

Finally, we have the idea of whether an expression refers to some object, or is a value.

int i = 0;  // There is some object on the stack, and i refers to it
i;          // This object would give you the value `0` if you read it
0;          // On the other hand, this is just a value!
Snippet 30: lvalues refer to objects, rvalues are values

In C, lvalues are objects and rvalues are values, simple as that.

lvalue rvalue
Can take address
Can assign
Refers to object
Is a value
Snippet 31: Summary of C value categories
(might be inaccurate since I didn’t research C that deeply)

Note that referring to an (existing) object and being a value are mutually exclusive. If an expression is a value, then it is not an object yet. But once it is an object, while its value can still be read from, it is now special as it has a memory location.

Thus, sometimes we will see the following distinction being made instead:

lvalue rvalue
Can take address
Can assign
Has identity
Snippet 32: Summary of C value categories
(using alternative naming)

where “has identity” refers to the property of having a memory location, and being an object.

While making these distinctions sounds redundant, they turn out to be different for C++…

5.2 Overview of C++ value categories

Since C++ wasn’t complicated enough, every expression can have one of 3 value categories.

First of all, prvalues more or less correspond to C rvalues, and lvalues more or less correspond to C lvalues.

xvalues are kind of in between, and share some properties with both lvalues and prvalues. One way to think about xvalues is that they are like lvalues that have been stripped the right to be assigned to, but given the right to be moved from.

This fits with the intuition since while x is an lvalue, std::move(x) is an xvalue.

5.3 How to intuitively identify each value category

You likely already have an intuition for how to identify whether expressions in C are lvalues or rvalues, sort of by “common sense” of whether they can go on the left hand side of an assignment.

In C++, this intuition mostly works as well. We’ll start with something that’s very straightforward, which is by looking at the return type of a function call.

T x;

T& returns_ref() { return x; }
T&& returns_refref() { return std::move(x); }
T returns_copy() { return x; }

returns_ref() = T{};     // Allowed, returns_ref() is an lvalue
returns_refref() = T{};  // NOT allowed, returns_refref() is an xvalue
returns_copy() = T{};    // NOT allowed, returns_copy() is a prvalue

a = returns_ref();       // Calls copy assign, copies x to a
b = returns_refref();    // Calls move assign, moves x to a
c = returns_copy();      // Calls move assign, moves a copy of x to a

Firstly, notice that lvalues and xvalues are references to existing objects, and not values.

Secondly, notice that xvalues and prvalues will trigger the move constructor. In fancy terms, we say that these expressions can be bound to rvalue references. On the other hand, lvalue references don’t trigger the move constructor.

Thirdly, notice that just like in C, we may only assign to lvalues, but not xvalues and prvalues. This makes some sense, but possibly kind of weird when it comes to the xvalue case, as returns_refref() refers to an object. However, with the intuition that objects that can be moved from shouldn’t be assignable, we can accept this for what it is.


With these three fundamental properties, you can get pretty good at guessing whether an expression is an lvalue, xvalue, or prvalue.

See if you can work through the following list and see the logic:

5.4 Properties of each value category

We’ll first show the full table of what we can do with each type of expression, and slowly explain what each row means in later sections.

lvalue xvalue prvalue
Can take address
Can assign
Refers to object
Is a value
Can be move-assigned from
Can be move-constructed from*

*We will talk about this at the end of this section. For now, when we say “moved-from”, we mean move-assigned from.

Snippet 33: Summary of C++ value categories

But as we can see from the table, xvalues sit somewhere in between.

Like lvalues, xvalues refer to an object, rather than a value. However, you’re not allowed to take the address of this object, even though objects have addresses!

And just like prvalues, xvalues cannot be assigned to, unless you’re a pedant.

While every expression is one of these three value categories, it is sometimes useful if we only care about one particular property.

In the context of move semantics, we care about whether an expression can be moved-from or not, or whether it will invoke the move constructor / assignment operator (i.e. can it be bound to an rvalue reference). Both xvalues and prvalues fall into this category, so we can say that these are “rvalues”.

Similarly, if we simply care about expressions that refer to objects, but don’t care about any other property, then either lvalues or xvalues will fulfil this criteria, so we say that these are “glvalues” (generalised lvalue).

Here’s the Venn diagram showing what the two combined categories contain.

Combined value categories Venn diagram (note that no expressions live in the the shaded / white regions)

If we include the combined categories in the table, here’s what we get. Try to ignore the ❓, just note that the main property of a glvalue is that it refers to an object (and thus is not a value), while the main property of an rvalue is that it can be moved from (i.e. it can be bound to a rvalue reference).

lvalue glvalue xvalue rvalue prvalue
Can take address
Can assign
Refers to object
Is a value
Can be moved from
Snippet 34: Summary of C++ value categories (and combined categories)

5.4.1 (glvalues vs prvalues) “refers to an object” vs “is a value”

When we say that glvalues refer to an object, we should contrast this from prvalues, which are values and do NOT refer to existing objects.

int x;

x;             // lvalue, refers to the object x above
std::move(x);  // xvalue, refers to the object x above
0;             // prvalue, is value and not object

SimpleVector x;

SimpleVector y = x;             // lvalue, refers to the object x above
                                // This invokes the ctor:
                                // SimpleVector(const SimpleVector&)

SimpleVector z = std::move(x);  // xvalue, refers to the object x above
                                // This invokes the ctor:
                                // SimpleVector(SimpleVector&&)

// the lifetime of x actually hasn't ended here!

Note that it’s important that xvalues refer to objects and are NOT values!

In order for the destructor for x to not double-free, the move constructor NEEDS to be able to mutate x so that it doesn’t continue to own the buffer containing the elements.

In order to do that, std::move(x) can’t be a value, it needs to refer to an object (namely x) so that the move constructor can mutate it.

We also saw earlier another big way these two categories differ, which is in the return type of a function:

int& f();   // lvalue, is glvalue, refers to existing object
int&& f();  // xvalue, is glvalue, refers to existing object
int f();    // prvalue, is new and freshly created value, not object

Note that along this axis, we can clearly see the resemblance to C lvalues and C rvalues. In C, lvalues always refer to existing objects, and rvalues always refer to freshly created values.


However, where C++ begins to diverge from C is when we look at xvalues, which refer to existing objects, but are slightly different from C lvalues since they cannot be assigned to, but can be moved from.

5.4.2 (lvalues vs rvalues) “can assign / take address” vs “cannot …”

Next up is the fundamental property of an lvalue, which is that it’s assignable (if we ignore pedantry).

In order to assign to some object, that object must already exist somewhere in memory, and the assignment will simply mutate that object in place.

int x;  // This object already exists on the stack

x = 1;  // This assignment works because x already exists...

std::move(x) = 1;  // But what about this?

However, we want to prevent assigning to rvalues. So in order to make std::move(x) = 1 invalid, it is not enough to say that assignment only requires that the object already exists somewhere in memory, because that is also true for std::move(x).

Thus, a fundamental property of an lvalue is NOT just that it refers to an object, but rather that it is assignable, which is subtly different. On the flip side, rvalues cannot be assigned to.

We might ask why taking the address of an lvalue is such a big deal. The reason is because it effectively allows us to assign to it:

int x;

int* px = &x;  // We can take the address of an lvalue...
*px = 1;       // and mutate the object

int y;

int* py = &std::move(y);  // IF this was allowed, then...
*py = 1;  // we'd be able to mutate y through std::move(y)!

5.4.3 (rvalues vs lvalues) “can be moved from” vs “cannot be moved from”

This last property is the whole point why C++ invented the xvalue value category.

Here, the only difference is whether it will trigger the move constructor or the copy constructor when both exist.

Specifically, this determines whether an expression can be bound to an rvalue reference T&&, or if it can only be bound to a const lvalue reference (const T&).

struct T {
  T(const T&) {
    // copy ctor stuff
  }
  T(T&&) {
    // move ctor stuff
  }

  // insert rule of 5 here
};

T x, y;

y = x;             // triggers copy-assign because this is lvalue
y = std::move(x);  // triggers move-assign because this is xvalue
y = T{};           // triggers move-assign because this is prvalue

const T& a = x;             // Ok, you can always be bound to const T&
const T& b = std::move(x);  // Ok, you can always be bound to const T&
const T& c = T{};           // Ok, you can always be bound to const T&

T&& d = x;             // Not ok, lvalues can't be bound to T&&
T&& e = std::move(x);  // Ok, rvalues can be bound to T&&
T&& f = T{};           // Ok, rvalues can be bound to T&&

5.5 Value category of an rvalue reference

Now, armed with this knowledge in mind, what is the value category of x.m in the following context?

struct S {
  T m;

  // What's the value category of x.m?
  //            |
  //            |
  //           v~v
  S(S&& x) : m(x.m) {}
  //           ^
  //           |
  // Also, what's the value category of just x?
};

The answer is that both x and x.m are lvalues!

This is very important! Value categories are given to expressions and not types!

Consider what the move constructor of NullableOwnPtr had to do:

  NullableOwnPtr(NullableOwnPtr&& other) : m_ptr(nullptr) {
    std::swap(m_ptr, other.m_ptr);
  }
Snippet 35: NullableOwnPtr’s move constructor, reproduced here

Here, we passed other.m_ptr into std::swap, which has the type signature:

template< class T >
constexpr void swap( T& a, T& b ) noexcept(/* see below */);
Snippet 36: Type signature of std::swap

which means that other.m_ptr is actually an lvalue.

In fact, we could have also written the constructor like so:

NullableOwnPtr(NullableOwnPtr&& other) : m_ptr(other.m_ptr) {
  other.m_ptr = nullptr;
}
Snippet 37: Alternative way of writing NullableOwnPtr’s move constructor

where it’s now clear that other and other.m_ptr were in fact lvalues.

Keep in mind that while other’s type is an rvalue reference, other refers to an object (because it is a reference) and that object is assignable to. Thus, it is an lvalue.

This is weird. But well, C++ is weird, and we can see why C++ was designed this way. If rvalue references could not be assigned to, then move constructors wouldn’t be able to modify the moved-from object, and thus it would be impossible to implement memory ownership with move semantics.

5.6 Special case for const lvalue reference binding

So far, we’ve been showing how lvalues and rvalues are allowed to be bound to lvalue references and rvalue references respectively.

T x;
T& lref1 = T{0};   // not ok
T& lref2 = x;      // ok
T&& rref1 = T{0};  // ok
T&& rref2 = x;     // not ok

However, this would make it so that if a class defines a copy assignment operator but NOT a move assignment operator, then it only has operator= overloads that take in an lvalue reference! Wouldn’t this mean that the following code cannot compile?

Godbolt link

struct T {
  int i;
  T(int i) : i(i) {}
  T(const T&) = default;
  T& operator=(const T&) = default;

  // By NOT following rule of 5 but only using rule of 3,
  // the move constructor and move assignment operator are not defined.
  // T(T&&);             // doesn't exist
  // T& operator=(T&&);  // doesn't exist
};

int main() {
  T x{0};
  x = T{1};  // RHS is prvalue, so it's an rvalue,
             // rvalues can only be bound to rvalue references, right?
}

But it compiles!

The reason for that while rvalues cannot be bound to lvalue references in general, they can be bound to const lvalue references.

This is a shim that C++ added for the sole purpose of making copy constructors / assignments work.

This means that if you defined the following:

Godbolt link

struct T {
  int i;
  T(int i) : i(i) {}

  // Ignore C++ coding conventions by using non-const lvalue ref
  T(T& other) : i(other.i) {}
  T& operator=(T&) {
    i = other.i;
  }
};

int main() {
  T x{0};
  x = T{1};  // RHS is prvalue, so it's an rvalue,
             // rvalues can only be bound to rvalue references
             // OR const lvalue references,
             // BUT NOT non-const lvalue references
}

It doesn’t compile, as expected.

5.7 Copy elision and guaranteed copy elision

We end off with one more footnote about guaranteed copy elision, which increases the importance of in-place construction.

5.7.1 Copy elision in action

Usually, C++ does not allow compilers to change the (observable) behaviour of (valid) C++ programs during optimisation or any other stage of the compilation process.

However, there are two exceptions, and copy elision is one of them.

Here’s an example of copy elision in action, where we explicitly disable copy elision in one compilation, and compile with optimisations in the other.

Godbolt link

// nrvo.cpp

#include <iostream>

struct Trace {
  Trace() {
    std::cout << "Trace()" << std::endl;
  }
  ~Trace() {
    std::cout << "~Trace()" << std::endl;
  }

  Trace(const Trace&) {
    std::cout << "Trace(const Trace&)" << std::endl;
  }
  Trace& operator=(const Trace&) {
    std::cout << "Trace& operator=(const Trace&)" << std::endl;
    return *this;
  }
};

Trace f() {
  Trace t;

  return t;
}

void g() {
  Trace t = f();
}

int main() {
  g();
}
$ make -B nrvo.NO-ELIDE.out
clang++ -std=c++20 -fno-elide-constructors -g -Wpedantic -Wall -Wextra -Wconversion -Wno-unused-variable -Wno-unused-but-set-variable   -c -MMD -o nrvo.NO-ELIDE.o nrvo.cpp
clang++ -std=c++20 -fno-elide-constructors -g -Wpedantic -Wall -Wextra -Wconversion -Wno-unused-variable -Wno-unused-but-set-variable    nrvo.NO-ELIDE.o   -o nrvo.NO-ELIDE.out
$ ./nrvo.NO-ELIDE.out
Trace()
Trace(const Trace&)
~Trace()
~Trace()
$ make -B nrvo.O3.out
clang++ -std=c++20 -O3 -g -Wpedantic -Wall -Wextra -Wconversion -Wno-unused-variable -Wno-unused-but-set-variable   -c -MMD -o nrvo.O3.o nrvo.cpp
clang++ -std=c++20 -O3 -g -Wpedantic -Wall -Wextra -Wconversion -Wno-unused-variable -Wno-unused-but-set-variable    nrvo.O3.o   -o nrvo.O3.out
$ ./nrvo.O3.out
Trace()
~Trace()
Snippet 38: Example of copy elision (specifically “named return value optimisation”, or NRVO)

We can see that when explicitly disabling copy elision, the program behaves “as expected”. But even though our program is perfectly valid C++ with no undefined behaviour anywhere, compiling with optimisations causes our program’s behaviour to change.

This is because NRVO is one of the optional copy elision patterns the compiler is allowed to apply to code to change its behaviour, usually saving lots of unnecessary copies.

However, a slightly spookier side of copy elision is known as guaranteed copy elision.

5.7.2 Guaranteed copy elision in action

Notice that when we compiled with optimisations, the copy constructor was never called. Does this mean that making the class non-copyable will not have any effect?

Godbolt link

// nrvo-noncopyable.cpp

#include <iostream>

struct Trace {
  Trace() {
    std::cout << "Trace()" << std::endl;
  }
  ~Trace() {
    std::cout << "~Trace()" << std::endl;
  }

  Trace(const Trace&) = delete;
  Trace& operator=(const Trace&) = delete;
};

Trace f() {
  Trace t;

  return t;
}

void g() {
  Trace t = f();
}

int main() {
  g();
}
$ make -B nrvo-noncopyable.O3.out
clang++ -std=c++20 -O3 -g -Wpedantic -Wall -Wextra -Wconversion -Wno-unused-variable -Wno-unused-but-set-variable   -c -MMD -o nrvo-noncopyable.O3.o nrvo-noncopyable.cpp
nrvo-noncopyable.cpp:20:10: error: call to deleted constructor of 'Trace'
  return t;
         ^
nrvo-noncopyable.cpp:13:3: note: 'Trace' has been explicitly marked deleted here
  Trace(const Trace&) = delete;
  ^
1 error generated.
make: *** [Makefile:54: nrvo-noncopyable.O3.o] Error 1
Snippet 39: Compiler will not compile our code even with optimisations!

It didn’t work. That’s because even though the compiler is allowed to make the optimisation, it does not apply the optimisation for the purposes of checking if the program is valid. Here, the copy does exist in the program, it’s simply that the compiler is allowed not to elide it.

This changes when we look at a different form of copy elision, called “return value optimisation” or RVO.

In C++14, RVO was just like NRVO, and was one of the few allowed optimisations, but at that point, there was no notion of a guaranteed copy elision.

So if we compile with C++14, we will see that just like NRVO, the compiler will complain if we don’t have a copy constructor, even if the copy constructor is never called when optimisations are turned on.

Godbolt link

// rvo.cpp

#include <iostream>

struct Trace {
  Trace() {
    std::cout << "Trace()" << std::endl;
  }
  ~Trace() {
    std::cout << "~Trace()" << std::endl;
  }

  Trace(const Trace&) {
    std::cout << "Trace(const Trace&)" << std::endl;
  }
  Trace& operator=(const Trace&) {
    std::cout << "Trace& operator=(const Trace&)" << std::endl;
    return *this;
  }

  Trace(Trace&&) {
    std::cout << "Trace(Trace&&)" << std::endl;
  }
  Trace& operator=(Trace&&) {
    std::cout << "Trace& operator=(Trace&&)" << std::endl;
    return *this;
  }
};

Trace f() {
  return Trace{};
}

void g() {
  Trace t = f();
}

int main() {
  g();
  Trace t;
  t = Trace{};
}
$ make -B rvo.cxx14.O3.out
clang++ -std=c++14 -O3 -g -Wpedantic -Wall -Wextra -Wconversion -Wno-unused-variable -Wno-unused-but-set-variable   -c -MMD -o rvo.cxx14.O3.o rvo.cpp
clang++ -std=c++14 -O3 -g -Wpedantic -Wall -Wextra -Wconversion -Wno-unused-variable -Wno-unused-but-set-variable    rvo.cxx14.O3.o   -o rvo.cxx14.O3.out
$ ./rvo.cxx14.O3.out
Trace()
~Trace()
Trace()
Trace()
Trace& operator=(Trace&&)
~Trace()
~Trace()

Godbolt link

// rvo-noncopyable.cpp

#include <iostream>

struct Trace {
  Trace() {
    std::cout << "Trace()" << std::endl;
  }
  ~Trace() {
    std::cout << "~Trace()" << std::endl;
  }

  Trace(const Trace&) = delete;
  Trace& operator=(const Trace&) = delete;
};

Trace f() {
  return Trace{};
}

void g() {
  Trace t = f();
}

int main() {
  g();
}
$ make -B rvo-noncopyable.cxx14.O3.out
clang++ -std=c++14 -O3 -g -Wpedantic -Wall -Wextra -Wconversion -Wno-unused-variable -Wno-unused-but-set-variable   -c -MMD -o rvo-noncopyable.cxx14.O3.o rvo-noncopyable.cpp
rvo-noncopyable.cpp:18:10: error: call to deleted constructor of 'Trace'
  return Trace{};
         ^~~~~~~
rvo-noncopyable.cpp:13:3: note: 'Trace' has been explicitly marked deleted here
  Trace(const Trace&) = delete;
  ^
rvo-noncopyable.cpp:22:9: error: call to deleted constructor of 'Trace'
  Trace t = f();
        ^   ~~~
rvo-noncopyable.cpp:13:3: note: 'Trace' has been explicitly marked deleted here
  Trace(const Trace&) = delete;
  ^
2 errors generated.
make: *** [Makefile:60: rvo-noncopyable.cxx14.O3.o] Error 1
Snippet 40: Similar to NRVO, in C++14 and older, compiler will not compile code with deleted copy constructor if it exists, even if it’s never called in practice.

However, this has changed since C++17, where RVO is now mandatory. This means that compilers will now need to apply RVO before checking the correctness of your program, and when it compiles, it must elide the copy.

$ make -B rvo-noncopyable.O3.out
clang++ -std=c++20 -O3 -g -Wpedantic -Wall -Wextra -Wconversion -Wno-unused-variable -Wno-unused-but-set-variable   -c -MMD -o rvo-noncopyable.O3.o rvo-noncopyable.cpp
clang++ -std=c++20 -O3 -g -Wpedantic -Wall -Wextra -Wconversion -Wno-unused-variable -Wno-unused-but-set-variable    rvo-noncopyable.O3.o   -o rvo-noncopyable.O3.out
Snippet 41: Since C++17, this now compiles

5.8 Rules for guaranteed copy elision

Now that we’ve seen some code compile with copy elision and have had their behaviour changed by the compiler, let’s look at the rules for guaranteed copy elision.

Currently, there are two places where the compiler must apply copy elision.

  1. When returning a prvalue
  2. When initialising an object with a prvalue

Here are an example of rule 1 in action:

struct T { int i; };

T f() {
  return T{1};
}

T x;
x = f();

Original program semantics:

In total, the operations performed looks something like this:

T temp{1};
T return_value{std::move(temp)};
x = std::move(return_value);

// After rule 1 is applied, the temporary does not move-construct the
// return value. Instead, the return value is directly constructed with
// the operation that created the prvalue. In total, the operations
// performed looks like this:
T return_value{1};
x = std::move(return_value);

Here’s a slightly larger example:

struct T { int i; };

T g() {
  return T{1};
}

T f() {
  return g();
}

T x;
x = f();

The original program semantics would consist of 1 construction of a temporary T{1} followed by two move-constructed return values, and finally a move-assignment to x.

After rule 1 is applied twice, there is a single construction of a temporary T{1} that’s then move assigned to x.

This way this works is that f() must use the operation that constructs g() to directly construct its return value. In turn, g() must use the operation that constructs T{1} to construct its return value. Thus, the return value of f() is constructed by the operation that constructs T{1}, and so there is only 1 temporary.

Rule 2 works quite similarly. Whenever we have an initialisation of an object where the initialiser expression is a prvalue, the construction of this object must be done with the operation that would have constructed the prvalue.

T x{T{1}};
// Same as
T x{1};

T x = T{1};
// Same as
T x{1};

void f(T x);
f(T{1});
// The parameter x is initialised by T{1}, which is a prvalue.
// So we end up constructing `x` directly with `1`.

T g();
f(g());
// Similarly, the parameter x is initialised by g(), which is a prvalue.
// So whatever operation g used to create the prvalue,
// (which will be some constructor call)
// it will be used to construct `x`.

T x = T{T{1}};
// Same as
T x{T{1}};
// Same as
T x{1};

5.9 How does guaranteed copy elision work?

While rule 2 is rather intuitive – the compiler can simply perform the transformation on the code level, rule 1 seems rather mysterious, as it requires crossing the function boundary in order to perform the construction directly. But when we see how this gets implemented, this will start to make more sense.

Let’s take a particular example and try to dissect it.

Godbolt link

#include <string>

struct T {
  // Here, we have 2 possible ways to construct a T
  T(int);
  T(bool);
};

T g(std::string s);
// g(std::string) may use either constructor

// Possible g #1:
// T g(std::string) { return T(1); }
// Possible g #2:
// T g(std::string) { return T(true); }

T f() {
  // How does f know what operation
  // g will use to construct the prvalue?
  // We don't even know what constructor it will call!
  return g("Hello world");
}

int main() {
  // Whatever that operation is,
  // it needs to be directly called to construct x.
  T x = f();
  (void) x;
}
Snippet 42: Running example for how guaranteed return value optimisation works

Recall from Lecture 1 how large return values work:

Calling convention diagram; in particular, note the return value allocation

It turns out this calling convention is forced whenever a non-trivial class is returned, which happens when you have a class with custom destructor or copy / move constructor, even if the size of the class is small.

Let’s break down the assembly for f, and try to understand the way this works.

f():                                  # @f()

####### Prologue

        push    r14
        push    rbx
        sub     rsp, 40

   #### Copy rdi to rbx (rdi contains pointer to return value allocation)
   #### We need to return the pointer to return value alloc later, in rax

        mov     rbx, rdi

  ##### Construct std::string on stack

        lea     r14, [rsp + 24]
        mov     qword ptr [rsp + 8], r14
        movabs  rax, 8031924123371070792
        mov     qword ptr [rsp + 24], rax
        mov     dword ptr [rsp + 31], 1684828783
        mov     qword ptr [rsp + 16], 11
        mov     byte ptr [rsp + 35], 0

####### Pass pointer to std::string as 2nd parameter `rsi`
   #### (recall 1st parameter `rdi` is still the return value alloc ptr)

        lea     rsi, [rsp + 8]

####### Call g

        call    g(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >)

  ##### Destructor for std::string

        mov     rdi, qword ptr [rsp + 8]
        cmp     rdi, r14  # Note r14 is a magic address computed earlier
        je      .LBB2_3
        call    operator delete(void*)
.LBB2_3:

####### Return return value alloc ptr in rax

        mov     rax, rbx

####### Epilogue

        add     rsp, 40
        pop     rbx
        pop     r14
        ret
Snippet 43: Assembly for f shows calling convention for non-trivial return types

Intuitively, f is simply told to “please construct your return value in this location, stored in rdi”, in order for f to implement RVO, all it has to do is pass this location onwards to whatever function actually needs to construct the return value, which in this case, is g.

6 Forwarding references

Full godbolt link

Full code snippet for this section
// main.cpp

#include <iostream>
#include <type_traits>
#include <vector>

template <typename T>
void func(T&&) {
  if (std::is_lvalue_reference_v<T>) {
    // Taken when func is passed lvalue
    std::cout << "T is an lvalue ref\n";
  } else if (std::is_rvalue_reference_v<T>) {
    // This branch is unreachable!
    std::cout << "T is an rvalue ref\n";
  } else {
    // Taken when func is passed rvalue
    std::cout << "T is NOT a ref\n";
  }
}

template <typename K, typename T>
K foozle(T&& arg) {
  return K{std::forward<T>(arg)};
}

template <typename K, typename T>
K foozle_broken(T&& arg) {
  return K{arg};
}

template <typename T>
void forwarding_ref_func(T&& x) {
  std::cout << "got " << x << "\n";
}

int main() {
  int x = 100;
  forwarding_ref_func(x);
  forwarding_ref_func(69);

  {
    std::cout << "<2\n";
    int x = 1;
    func(100);
    func(x);
    std::cout << "2>\n";
  }

  {
    std::cout << "<3\n";
    struct Foo {
      Foo(int a, int b, int c) : x(a), y(b), z(c) {}
      int x, y, z;
    };

    std::vector<Foo> foos{};
    foos.emplace_back(10, 20, 30);

    std::cout << "foos[0]: x="        //
              << foos[0].x            //
              << ", y=" << foos[0].y  //
              << ", z=" << foos[0].z  //
              << "\n";
    std::cout << "3>\n";
  }

  {
    struct Foo {
      Foo(int&) {
        std::cout <<  //
            "lvalue-ref ctor\n";
      }
      Foo(int&&) {
        std::cout <<  //
            "rvalue-ref ctor\n";
      }
    };

    std::cout << "<4\n";
    int x = 0;
    foozle<Foo>(x);
    foozle<Foo>(10);

    std::cout << "------\n";
    foozle_broken<Foo>(x);
    foozle_broken<Foo>(10);
    std::cout << "4>\n";
  }
}

// A is a forwarding ref
template <typename A>
void f1(A&&) {}

// B is not a forwarding ref,
// since it is not a parameter
template <typename B>
B&& f2(int) {}

// C is *NOT* a forwarding ref, as
// it's cv-qualified (const)
template <typename C>
void f3(const C&&) {}

template <typename D>
struct S4 {
  // D is *NOT* a forwarding ref,
  // since f4 itself isn't templated
  void f4(D&&) {}
};

template <typename E>
struct S5 {
  // E is *NOT* a forwarding ref,
  // since S5 (the constructor)
  // is not templated
  S5(E&&) {}
};

While there are only two types of references — lvalue references and rvalue references — there are some advanced ways combine them to implement useful behaviour, which we’ll cover below.

6.1 Reference collapsing rules

In templated code, we might end up with a typedef (or using) that resolves to a reference type; if we then need to take a reference to that type, we might have something like this:

using Foo = int&;
Foo& x = ...;

Since references are not objects (as discussed in Lecture 1), it doesn’t really make sense to take a reference to a reference. So what is the type of x here? Well, we use the reference collapsing rules to find out. Suppose we have some type A that is either T& or T&&, and we want to make either A& or A&&:

  1. If A is T&, then A& is T&, and A&& is also T&
  2. If A is T&&, then A& is T&, and A&& is T&&.

Essentially, lvalue references “always win”, and you can only get an rvalue reference from collapsing two rvalue references.

6.2 What is a forwarding reference?

The real magic (and reason) why reference collapsing exists is to facilitate the creation of forwarding references. Confusingly, the syntax for a forwarding reference is exactly the same as that of an rvalue reference — T&&. The differentiating factor is what kind of type T is.

If T is a template type parameter, and only of the immediately surrounding function template, and it is the type of a function parameter, then it is a forwarding reference. Let’s look at a few scenarios:

// A is a forwarding ref
template <typename A>
void f1(A&&) {}

// B is not a forwarding ref,
// since it is not a parameter
template <typename B>
B&& f2(int) {}
// C is *NOT* a forwarding ref, as
// it's cv-qualified (const)
template <typename C>
void f3(const C&&) {}
template <typename D>
struct S4 {
  // D is *NOT* a forwarding ref,
  // since f4 itself isn't templated
  void f4(D&&) {}
};
template <typename E>
struct S5 {
  // E is *NOT* a forwarding ref,
  // since S5 (the constructor)
  // is not templated
  S5(E&&) {}
};
Snippet 44: Some examples of what is a forwarding reference (but mostly what is not)

So, what is the point of a forwarding reference? It’s simply a mechanism that allows the template type to be inferred as either an lvalue reference or a value (not a reference), depending on the argument type. For example:

CppInsights link

template <typename T>
void forwarding_ref_func(T&& x) {
  std::cout << "got " << x << "\n";
}
  int x = 100;
  forwarding_ref_func(x);
  forwarding_ref_func(69);
Snippet 45: Calling the function with both an rvalue and an lvalue

Note that we get two instantiations of the template, one that takes an lvalue reference, and one that takes an rvalue reference. If we look at the CppInsights link (above), we can indeed see two separate instantiations.

Again, the semantics of forwarding references are not that they are a magical kind of reference that is both an lvalue and rvalue reference, but rather that they allow for T to be deduced as either int& or int — which is why we get two different instantiations of the function template.

In fact, we can confirm this through the error messages:

template <typename T>
void func(T&&) {
  if (std::is_lvalue_reference_v<T>) {
    // Taken when func is passed lvalue
    std::cout << "T is an lvalue ref\n";
  } else if (std::is_rvalue_reference_v<T>) {
    // This branch is unreachable!
    std::cout << "T is an rvalue ref\n";
  } else {
    // Taken when func is passed rvalue
    std::cout << "T is NOT a ref\n";
  }
}
    int x = 1;
    func(100);
    func(x);
$ ./main.out | sed -n '1,/<2/d;/2>/q;p'
T is NOT a ref
T is an lvalue ref

As we can see, when we give an lvalue reference, we deduce T = int&, and when calling with an rvalue reference, the compiler deduces T = int (note: not int&&!). With the reference collapsing rules, this makes the actual parameter types int& and int&& respectively.

6.3 How std::move is implemented

Now that we know about forwarding references, we can take a look at how std::move is implemented:

template <class _Tp>
typename remove_reference<_Tp>::type&& move(_Tp&& __t) {
  using _Up = remove_reference<_Tp>::type;
  return static_cast<_Up&&>(__t);
}
Snippet 46: The implementation of std::move in libc++

It’s as simple as that — just a cast. If the argument is an lvalue, then we infer _Tp = int& (for example), and we will return an rvalue reference to it via the cast. While rvalue references cannot bind to lvalues directly, the explicit cast lets us do that.

If the argument was an rvalue, then we infer _Tp = int, and we cast to an rvalue reference — which does nothing, since the argument was already an rvalue reference.

Either way, we get to return an rvalue reference. Note that we have to manually remove any references from the return type before adding &&, otherwise we would return lvalue references if _Tp was deduced to be an lvalue reference, due to reference collapsing.

7 In-place construction

A neat feature that many of the standard containers support is in-place construction, otherwise known by their function name — emplace. For example, std::vector has emplace_back, std::set has emplace, etc.

Emplacement works by forwarding the arguments to the constructor of the element type, and instead of doing moves or copies, constructs the object in its final resting place. This mechanism is also sometimes known as perfect forwarding.

For example, if we have a type whose construct has 3 parameters, we can use emplace_back to forward the arguments:

    struct Foo {
      Foo(int a, int b, int c) : x(a), y(b), z(c) {}
      int x, y, z;
    };

    std::vector<Foo> foos{};
    foos.emplace_back(10, 20, 30);

    std::cout << "foos[0]: x="        //
              << foos[0].x            //
              << ", y=" << foos[0].y  //
              << ", z=" << foos[0].z  //
              << "\n";
$ ./main.out | sed -n '1,/<3/d;/3>/q;p'
foos[0]: x=10, y=20, z=30

Let’s try to implement something like that for our own types.

7.1 Implementing in-place construction for NullableOwnPtr

You might have noticed that our NullableOwnPtr only lets you default-construct the object, and we cannot pass it any arguments. If our type is not default-constructible (or we don’t want to use the default constructor), then we’re out of luck.

Fortunately, we let NullableOwnPtr’s constructor simply forward the arguments to the constructor of the element type:

  template <typename... Args>
  NullableOwnPtr(Args&&... args) {
    m_ptr = new T{std::forward<Args>(args)...};
  }
Snippet 47: The forwarding constructor for NullableOwnPtr

There’s a bunch of stuff to break down, so let’s take it step by step. First, we declare this constructor to be a variadic template. This lets the function take zero or more arguments, where each argument can be a different type; we do this by using typename....

The “list” of types are stored in the parameter pack, which is Args here. Note that we also make Args a forwarding reference here by using Args&&; this means that the expansion is as-if each parameter were a forwarding reference. For example:

template <typename T1, typename T2, typename T3>
void foo(T1&& a, T2&& b, T3&& c) { /* ... */ }

// later:
// T1 = char&, T2 = int, T3 = double
char a = 'a';
foo(a, 0xb, 3.14);
Snippet 48: An illustration of a variadic template function instantiation

Next, we have std::forward<Args>(args)..., which looks very strange; what this does is forward each argument (hence the name), and here we forward it to the constructor of T by using new T{...}. We have to expand the parameter pack by appending ... to the function call, so that each argument is forwarded individually.

This is somewhat equivalent to this:

new T{std::forward<Args>(args)...};
new T{std::forward<char&>(args[0]), std::forward<int>(args[1])};
Snippet 49: An illustration of how std::forward expands with variadic templates

Note that you can’t actually index into parameter packs this way (since they might have different types), but it illustrates the point. We’ll properly cover variadic templates in the metaprogramming lecture.

At this point, we’ve written the constructor that correctly forwards arguments to the constructor of the actual object. We can verify this works with a non-default-constructible type:

  struct Foo {
    // default constructor is deleted
    Foo(int&& x, int& y) : x(x), y(y) {}

    int x;
    int y;
  };

  int y = 20;
  auto foo = NullableOwnPtr<Foo>(10, y);
  std::cout << "foo = ("       //
            << foo->x << ", "  //
            << foo->y << ")\n";

  // this is a compile error
  // auto bar = NullableOwnPtr<Foo>();
Snippet 50: Demonstrating that our forwarding constructor works
$ ./main.out
foo = (10, 20)

7.1.1 How does std::forward work?

Since rvalue references themselves are actually lvalues, we need a way to preserve (or… forward) the value category of an expression. std::forward does this by using reference collapsing rules, and providing two overloads — one for rvalue references and one for lvalue references.

The definition is something like this:

template <typename T>
T&& forward(std::remove_reference_t<T>& t) {
  return static_cast<T&&>(t);
}
template <typename T>
T&& forward(std::remove_reference_t<T>&& t) {
  return static_cast<T&&>(t);
}
Snippet 51: Possible implementation of std::forward

Just like std::move, std::forward is literally just a static_cast into a T&&. But where they differ is in the placement of the forwarding reference type. For std::forward, we are not using the argument to deduce T (which is why you need to manually give the template argument).

Instead, we explicitly remove the references from the template type (using the type_traits function std::remove_reference_t — covered later), and then have an overload taking &, and one taking && — lvalue and rvalue references respectively. There are no forwarding references here!

The return type is then either an lvalue reference or an rvalue reference, depending on the reference collapsing rules. If the original argument was an lvalue reference, then we get an lvalue reference. If it was an rvalue reference, then we get an rvalue reference.

For example, let’s look at a function (foozle) that simply forwards a single argument to a type K (which we specify), and a type Foo that has a constructor for an lvalue reference and an rvalue reference. We can contrast this with a function that doesn’t forward correctly (foozle_broken):

template <typename K, typename T>
K foozle(T&& arg) {
  return K{std::forward<T>(arg)};
}

template <typename K, typename T>
K foozle_broken(T&& arg) {
  return K{arg};
}
    struct Foo {
      Foo(int&) {
        std::cout <<  //
            "lvalue-ref ctor\n";
      }
      Foo(int&&) {
        std::cout <<  //
            "rvalue-ref ctor\n";
      }
    };

Let’s run it first:

    int x = 0;
    foozle<Foo>(x);
    foozle<Foo>(10);

    std::cout << "------\n";
    foozle_broken<Foo>(x);
    foozle_broken<Foo>(10);
$ ./main.out | sed -n '1,/<4/d;/4>/q;p'
lvalue-ref ctor
rvalue-ref ctor
------
lvalue-ref ctor
lvalue-ref ctor
Snippet 52: Demonstrating how std::forward works

As we might expect from the name, foozle_broken is broken, and it calls the lvalue constructor of Foo in both cases. Let’s walkthrough how std::forward works in this instance, for foozle.

In the first case, T is deduced to be int&, and the argument type is also int&. This selects the first overload of std::forward (taking an lvalue reference); reference collapsing turns static_cast<int& &&>, into static_cast<int&>, which does nothing (since the argument is already an lvalue reference). The return type undergoes the same collapsing rules, and the final result is an lvalue reference.

In the second case, we actually also select the first overload; the expression arg is an lvalue even if its type is an rvalue reference. However, since T was deduced to be int (via forwarding reference), the return type of std::forward is now int&&, which is an rvalue reference.

This sidesteps the issue where rvalue references, when named, are actually lvalues; since it is the result of an expression, using std::forward here keeps it as an rvalue reference.

When is the second overload of std::forward used?

Since both instances above call the first overload of std::forward, you might wonder when the second one is selected, or whether or not it’s even necessary. The answer is that, in 99% of cases, the first overload is used.

The second one is used only if the argument to std::forward isn’t the name of an rvalue reference, but rather an actual rvalue.

7.1.2 Moving vs forwarding

It is important to note that std::move and std::forward do very different things! Moving will always give you an rvalue reference, either by converting an lvalue reference to an rvalue one, or by keeping the rvalue reference.

On the other hand, forwarding will preserve the value category of the thing that you’re forwarding.

7.2 Implementing emplace_back for SimpleVector

Now it should actually be trivial to implement emplace_back for SimpleVector by taking the lessons we learnt above:

  template <typename... Args>
  void emplace_back(Args&&... args) {
    auto new_buffer = make_new_buffer(m_size + 1);
    new_buffer[m_size] = ElemTy(std::forward<Args>(args)...);

    m_size += 1;
    delete[] m_buffer;
    m_buffer = new_buffer;
  }
Snippet 53: Implementation of emplace_back for SimpleVector

Let’s try it:

  struct Foo {
    Foo() {}
    Foo(int x, int y) : x(x), y(y) {}

    int x;
    int y;
  };

  SimpleVector<Foo> foos{};
  foos.emplace_back(69, 420);

  std::cout << "foo = ("          //
            << foos[0].x << ", "  //
            << foos[0].y << ")\n";
$ ./simplevector.out
foo = (69, 420)
Snippet 54: Demonstration of emplace_back for SimpleVector
Why does Foo still need a default constructor?

A keen observer might notice that we had to define a default constructor for Foo; if we didn’t, then we get a compile error. This is because our SimpleVector still default constructs elements when we make a new buffer.

Avoiding this while staying clear of undefined behaviour is quite involved, so we’ll cover it later; for now, just know that an actual std::vector obviously has a superior implementation compared to ours.

7.3 Footnote on push / insert vs emplace

Be careful when using emplace when you mean to push / insert and vice versa. While it might sound like we want to emplace as much as possible for all the performance benefits it brings, sometimes it can be rather error prone.

std::vector<std::vector<int>> list_of_lists{{1}, {4, 5, 6}};

// Let's say I want to make mat look like {{1, 2, 3}, {4, 5, 6}}

// mat.push_back(2);
// Compile error! I forgot to specify `mat[0]`

mat[0].push_back(2);
// mat looks like {{1, 2}, {4, 5, 6}} now

// Suppose I used emplace_back, but made the same mistake
mat.emplace_back(3);
// No compile error...? What does this do?

// mat looks like {{1, 2}, {4, 5, 6}, {0, 0, 0}} now

// This actually calls the std::vector<int>(size_t) constructor!
//
// > 4) Constructs the container with count default-inserted
// >    instances of T. No copies are made.
//
// That's definitely not what I meant to do
Snippet 55: Using the wrong type by accident

In general, using emplace should only be used when you know you’ll gain some performance benefit from in-place construction, or if your type is non-movable and has to be constructed in-place. While it is faster in these cases, it is not any faster in many cases, and as demonstrated above, can sometimes be rather error prone.

push and insert on the other hand are quite reliable. If we look at the type signature for std::vector<T>::push_back:

constexpr void push_back( const T& value );
constexpr void push_back( T&& value );

We can see that it simply takes in a T directly and will simply insert that T. If we don’t pass in exactly a T, we will get a compile error, like in the scenario we showed above.

On the other hand, in order to know what parameters emplace takes, we need to know what parameters the constructors of T take. Sometimes, T may accept many different kinds of parameters as input, and this might be more than we expect.

Here are some situations where you probably just want to stick to push / insert:

// 1) Copy / move is fast enough:
std::vector<std::string> v1;
v1.push_back("Hello world");

std::vector<int> v2;
v2.push_back(1);
// 2) You already have a local T, and you just want to insert that.
std::vector<T> bazaar;

for (...) {
  T x;
  x.m = 5;
  x.foozle();
  x.barbar(y);
  // mutate x a lot

  // just use push_back!
  bazaar.push_back(std::move(x));

  // emplace_back would simply call move ctor anyway, so it is useless
  // bazaar.emplace_back(std::move(x));
}
// 3) In general, if push_back and emplace_back
//    would work with the same arguments, then just use push_back.
//    That usually means you're simply invoking some
//    copy / move constructor or implicit conversion.
//
//    Our earlier examples fall into this category.
//    https://abseil.io/tips/112

On the other hand, here are some situations where you want to consider emplace:

// 1) If you need to, because the type is not copyable / movable
std::list<std::mutex> mutexes;
std::mutex m{};
// mutexes.push_back(m);                // Cannot move
// mutexes.push_back(std::mutex{});     // Cannot move
// mutexes.emplace_back(std::mutex{});  // Cannot move
mutexes.emplace_back();  // Finally compiles
// 2) If you find yourself creating a temporary just to insert an object.
//    Especially if move is expensive!
std::vector<std::vector<int>> mat;

// Unnecessary move
mat.push_back(std::vector<int>{1, 2, 3});
// Note the above is the same as:
mat.push_back(std::vector<int>(std::initializer_list<int>{1, 2, 3}));

// No move, simply in-place construction
mat.emplace_back(std::initializer_list<int>{1, 2, 3});


std::vector<std::array<char, 4096>> pages;

// Creates a temporary of 4096 zeroes, then copies that
// In total, 8192 bytes had to be written
pages.push_back(std::array<char, 4096>{});

// No move, simply in-place construction (of 4096 zeroes)
// In total, 4096 bytes had to be written
// This is super cheap
pages.emplace_back();

// https://quick-bench.com/q/U4NupTHeoyuHU7rHP4SGHqMXbeo

7.3.1 Summary of push vs emplace

The main considerations for choosing between push and emplace are:

  1. Whether emplace is necessary (ie. the type is non-copyable and non-movable)
  2. Whether hidden bugs can be introduced when the wrong one is used (eg. accidentally calling a constructor that you didn’t mean to)
  3. Whether there are performance benefits to using emplace (eg. if move is already cheap, then the benefit is minimal)

8 References

© 30 June 2022, Thomas Tan, Ng Zhia Yang, All Rights Reserved

^

dummy