src/part06.rs

   1 // Rust-101, Part 06: Copy
   2 // =======================
   3
   4 use part05::BigInt;
   5
   6 // With `BigInt` being about numbers, we should be able to write a version of `vec_min`
   7 // that computes the minimum of a list of `BigInt`. We start by writing `min` for
   8 // `BigInt`. Now our assumption of having no trailing zeros comes in handy!
   9 impl BigInt {
  10     fn min(self, other: Self) -> Self {
  11         // Just to be sure, we first check that both operands actually satisfy our invariant.
  12         // `debug_assert!` is a macro that checks that its argument (must be of type `bool`)
  13         // is `true`, and panics otherwise. It gets removed in release builds, which you do with
  14         // `cargo build --release`.
  15         debug_assert!(self.test_invariant() && other.test_invariant());
  16         // If the lengths of the two numbers differ, we already know which is larger.
  17         if self.data.len() < other.data.len() {
  18             self
  19         } else if self.data.len() > other.data.len() {
  20             other
  21         } else {
  22             // **Exercise 05.1**: Fill in this code.
  23             panic!("Not yet implemented.");
  24         }
  25     }
  26 }
  27
  28 // Now we can write `vec_min`. In order to make it type-check, we have to write it as follows.
  29 fn vec_min(v: &Vec<BigInt>) -> Option<BigInt> {
  30     let mut min: Option<BigInt> = None;
  31     for e in v {
  32         min = Some(match min {
  33             None => e.clone(),
  34             Some(n) => e.clone().min(n)
  35         });
  36     }
  37     min
  38 }
  39 // Now, what's happening here? Why do we have to write `clone()`, and why did we not
  40 // have to write that in our previous version?
  41 //
  42 // The answer is already hidden in the type of `vec_min`: `v` is just borrowed, but
  43 // the Option<BigInt> that it returns is *owned*. We can't just return one of the elements of `v`,
  44 // as that would mean that it is no longer in the vector! In our code, this comes up when we update
  45 // the intermediate variable `min`, which also has type `Option<BigInt>`. If you replace `e.clone()`
  46 // in the `None` arm with `*e`, Rust will complain "Cannot move out of borrowed content". That's because
  47 // `e` is a `&BigInt`. Assigning `min = Some(*e)` works just like a function call: Ownership of the
  48 // underlying data is transferred from where `e` borrows from to `min`. But that's not allowed, since
  49 // we just borrowed `e`, so we cannot empty it! We can, however, call `clone()` on it. Then we own
  50 // the copy that was created, and hence we can store it in `min`.<br/>
  51 // Of course, making such a full copy is expensive, so we'd like to avoid it. We'll some to that soon.
  52
  53 // ## `Copy` types
  54 // But before we go there, I should answer the second question I brought up above: Why did our old `vec_min` work?
  55 // We stored the minimal `i32` locally without cloning, and Rust did not complain. That's because there isn't
  56 // really much of an "ownership" when it comes to types like `i32` or `bool`: If you move the value from one
  57 // place to another, then both instance are "complete". We also say the value has been *duplicated*. This is in
  58 // stark contrast to types like `Vec<i32>`, where moving the value results in both the old and the new vector to
  59 // point to the same underlying buffer. We don't have two vectors, there's no duplication.
  60 //
  61 // Rust calls types that can be freely duplicated `Copy` types. `Copy` is another trait, and it
  62 // is implemented for types like `i32` and `bool`. Remember how we defined the trait `Minimum` by writing
  63 // `trait Minimum : Copy { ...`? This tells Rust that every type that implements `Minimum` must also
  64 // implement `Copy`, and that's why the compiler accepted our generic `vec_min` in part 02.
  65 // `Copy` is the first *marker trait* that we encounter: It does not provide any methods, but
  66 // makes a promise about the behavior of the type - in this case, being duplicable.
  67
  68 // If you try to implement `Copy` for `BigInt`, you will notice that Rust
  69 // does not let you do that. A type can only be `Copy` if all its elements
  70 // are `Copy`, and that's not the case for `BigInt`. However, we can make
  71 // `SomethingOrNothing<T>` copy if `T` is `Copy`.
  72 use part02::{SomethingOrNothing,Something,Nothing};
  73 impl<T: Copy> Copy for SomethingOrNothing<T>{}
  74 // Again, Rust can generate implementations of `Copy` automatically. If
  75 // you add `#[derive(Copy,Clone)]` right before the definition of `SomethingOrNothing`,
  76 // both `Copy` and `Clone` will automatically be implemented.
  77
  78 // ## An operational perspective
  79 // Instead of looking at what happens "at the surface" (i.e., visible in Rust), one can also explain
  80 // ownership passing and how `Copy` and `Clone` fit by looking at what happens on the machine.<br/>
  81 // When Rust code is executed, passing a value (like `i32` or `Vec<i32>`) to a function will always
  82 // result in a shallow copy being performed: Rust just copies the bytes representing that value, and
  83 // considers itself done. That's just like the default copy constructor in C++. Rust, however, will
  84 // consider this a destructive operation: After copying the bytes elsewhere, the original value must
  85 // no longer be used. After all, the two could not share a pointer! If, however, you mark a type `Copy`,
  86 // then Rust will *not* consider a move destructive, and just like in C++, the old and new value
  87 // can happily coexist. Now, Rust does not allow to to overload the copy constructor. This means that
  88 // passing a value around will always be a fast operation, no allocation or any other kind of heap access
  89 // will happen. In the situations where you would write a copy constructor in C++ (and hence
  90 // incur a hidden cost on every copy of this type), you'd have the type *not* implement `Copy`, but only
  91 // `Clone`. This makes the cost explicit.
  92
  93 // ## Lifetimes
  94 // To fix the performance problems of `vec_min`, we need ti avoid using `clone()`. We'd like
  95 // the return value to not be owned (remember that this was the source of our need for cloning), but *borrowed*.
  96
  97 // This is demonstrated by the function `head` that borrows the first element of a vector if it is non-empty.
  98 // The type of the function says that it will either return nothing, or it will return a borrowed `T`.
  99 // We can then borrow the first element of `v` and use it to construct the return value.
 100 fn head<T>(v: &Vec<T>) -> Option<&T> {
 101     if v.len() > 0 {
 102         Some(&v[0])
 103     } else {
 104         None
 105     }
 106 }
 107
 108 // Now, coming back to `head` - here, we are returning a pointer to the first element. But doesn't
 109 // that mean that callers have to be careful? Imagine `head` would be a C++ function, and we would
 110 // write the following code.
 111 /*
 112   int foo(std::vector<int> v) {
 113     int *first = head(v);
 114     v.push_back(42);
 115     return *first;
 116   }
 117 */
 118 // This is very much like our very first motivating example for ownership, at the beginning of part 04.
 119 // But this time, the bug is hidden behind the call to `head`. How does Rust solve this? If we translate
 120 // the code above to Rust, it doesn't compile, so clearly we are good - but how and why?
 121 // (Notice that have to explicitly assert using `unwrap` that `first` is not `None`, whereas the C++ code
 122 // above would silently dereference a `NULL`-pointer. But that's another point.)
 123 fn rust_foo(mut v: Vec<i32>) -> i32 {
 124     let first: Option<&i32> = head(&v);
 125     /* v.push(42); */
 126     *first.unwrap()
 127 }
 128
 129 // To give the answer to this question, we have to talk about the *lifetime* of a borrow. The point is, saying that
 130 // you borrowed your friend a `Vec<i32>`, or a book, is not good enough, unless you also agree on *how long*
 131 // your friend can borrow. After all, you need to know when you can rely on owning your data (or book) again.
 132 //
 133 // Every borrow in Rust has an associated lifetime. The full type of `head` reads as follows:
 134 // `fn<'a, T>(&'a Vec<T>) -> Option<&'a T>`. Here, `'a` is a *lifetime variable*, which represents how long the vector has
 135 // been borrowed. The function type expresses that argument and return value have *the same lifetime*.
 136 //
 137 // When analyzing the code of `rust_foo`, Rust has to assign a lifetime to `first`. It will choose the scope
 138 // where `first` is valid, which is the entire rest of the function. Because `head` ties the lifetime of its
 139 // argument and return value together, this means that `&v` also has to borrow `v` for the entire duration of
 140 // the function. So when we try to borrow `v` mutable for `push`, Rust complains that the two borrows (the one
 141 // for `head`, and the one for `push`) overlap. Lucky us! Rust caught our mistake and made sure we don't crash the program.
 142 //
 143 // So, to sum this up: Lifetimes enable Rust to reason about *how long* a pointer has been borrowed. We can thus
 144 // safely write functions like `head`, that return pointers into data they got as argument, and make sure they
 145 // are used correctly, *while looking only at the function type*. At no point in our analysis of `rust_foo` did
 146 // we have to look *into* `head`. That's, of course, crucial if we want to separate library code from application code.
 147 // Most of the time, we don't have to explicitly add lifetimes to function types. This is thanks to *lifetimes elision*,
 148 // where Rust will automatically insert lifetimes we did not specify, following some [simple, well-documented rules](http://doc.rust-lang.org/stable/book/lifetimes.html#lifetime-elision).
 149
 150 // [index](main.html) | [previous](part05.html) | [next](main.html)