workspace/src/part09.rs

   1 // Rust-101, Part 09: Iterators (WIP)
   2 // ==================================
   3
   4 use part05::BigInt;
   5
   6 // In the following, we will look into the iterator mechanism of Rust and make our `BigInt` compatible
   7 // with the `for` loops. Of course, this is all about implementing particular traits again. In particular,
   8 // an iterator is something that implements the `Iterator` trait. As you can see in [the documentation](http://doc.rust-lang.org/beta/std/iter/trait.Iterator.html),
   9 // this trait mandates a single function `next` returning an `Option<Self::Item>`, where `Item` is an
  10 // associated type chosen by the implementation. (There are many more methods provided for `Iterator`,
  11 // but they all have default implementations, so we don't have to worry about them right now).
  12 //
  13 // For the case of `BigInt`, we want our iterator to iterate over the digits in normal, notational order: The most-significant
  14 // digit comes first. So, we have to write down some type, and implement `Iterator` for it such that `next` returns the digits
  15 // one-by-one. Clearly, the iterator must somehow be able to access the number it iterates over, and it must store its current
  16 // location. However, it cannot *own* the `BigInt`, because then the number would be gone after iteration! That'd certainly be bad.
  17 // The only alternative is for the iterator to *borrow* the number.
  18
  19 // In writing this down, we again have to be explicit about the lifetime of the borrow: We can't just have an
  20 // `Iter`, we must have an `Iter<'a>` that borrowed the number for lifetime `'a`. This is our first example of
  21 // a datatype that's polymorphic in a lifetime, as opposed to a type. <br/>
  22 // `usize` here is the type of unsigned, pointer-sized numbers. It is typically the type of "lengths of things",
  23 // in particular, it is the type of the length of a `Vec` and hence the right type to store an offset into the vector of digits.
  24 struct Iter<'a> {
  25     num: &'a BigInt,
  26     idx: usize, // the index of the last number that was returned
  27 }
  28
  29 // Now we are equipped to implement `Iterator` for `Iter`.
  30 impl<'a> Iterator for Iter<'a> {
  31     // We choose the type of things that we iterate over to be the type of digits, i.e., `u64`.
  32     type Item = u64;
  33
  34     fn next(&mut self) -> Option<u64> {
  35         // First, check whether there's any more digits to return.
  36         if self.idx == 0 {
  37             // We already returned all the digits.
  38             unimplemented!()
  39         } else {
  40             // Decrement, and return next digit.
  41             unimplemented!()
  42         }
  43     }
  44 }
  45
  46 // All we need now is a function that creates such an iterator for a given `BigInt`.
  47 impl BigInt {
  48     // Notice that when we write the type of `iter`, we don't actually have to give the lifetime parameter of `Iter`. Just as it is
  49     // the case with functions returning borrowed data, you can elide the lifetime. The rules for adding the lifetimes are exactly the
  50     // same. (See the last section of [part 06](part06.html).)
  51     fn iter(&self) -> Iter {
  52         unimplemented!()
  53     }
  54 }
  55
  56 // We are finally ready to iterate! Remember to edit `main.rs` to run this function.
  57 pub fn main() {
  58     let b = BigInt::new(1 << 63) + BigInt::new(1 << 16) + BigInt::new(1 << 63);
  59     for digit in b.iter() {
  60         println!("{}", digit);
  61     }
  62 }
  63
  64 // Of course, we don't have to use `for` to apply the iterator. We can also explicitly call `next`.
  65 fn print_digits_v1(b: &BigInt) {
  66     let mut iter = b.iter();
  67     // `loop` is the keyword for a loop without a condition: It runs endlessly, or until you break out of
  68     // it with `break` or `return`.
  69     loop {
  70         // Each time we go through the loop, we analyze the next element presented by the iterator - until it stops.
  71         match iter.next() {
  72             None => break,
  73             Some(digit) => println!("{}", digit)
  74         }
  75     }
  76 }
  77
  78 // Now, it turns out that this combination of doing a loop and a pattern matching is fairly common, and Rust
  79 // provides some convenient syntactic sugar for it.
  80 fn print_digits_v2(b: &BigInt) {
  81     let mut iter = b.iter();
  82     // `while let` performs the given pattern matching on every round of the loop, and cancels the loop if the pattern
  83     // doesn't match. There's also `if let`, which works similar, but of course without the loopy part.
  84     while let Some(digit) = iter.next() {
  85         println!("{}", digit)
  86     }
  87 }
  88
  89 // ## Iterator invalidation and lifetimes
  90 // You may have been surprised that we had to explicitly annotate a lifetime when we wrote `Iter`. Of
  91 // course, with lifetimes being present at every borrow in Rust, this is only consistent. But do we at
  92 // least gain something from this extra annotation burden? (Thankfully, this burden only occurs when we
  93 // define *types*, and not when we define functions - which is typically much more common.)
  94 //
  95 // It turns out that the answer to this question is yes! This particular aspect of the concept of
  96 // lifetimes helps Rust to eliminate the issue of *iterator invalidation*. Consider the following
  97 // piece of code.
  98 fn iter_invalidation_demo() {
  99     let mut b = BigInt::new(1 << 63) + BigInt::new(1 << 16) + BigInt::new(1 << 63);
 100     for digit in b.iter() {
 101         println!("{}", digit);
 102         /*b = b + BigInt::new(1);*/                                 /* BAD! */
 103     }
 104 }
 105 // If you enable the bad line, Rust will reject the code. Why? The problem is that we are modifying the
 106 // number while iterating over it. In other languages, this can have all sorts of effects from inconsistent
 107 // data or throwing an exception (Java) to bad pointers being dereferenced (C++). Rust, however, is able to
 108 // detect this situation. When you call `iter`, you have to borrow `b` for some lifetime `'a`, and you obtain
 109 // `Iter<'a>`. This is an iterator that's only valid for lifetime `'a`. Gladly, we have this annotation available
 110 // to make such a statement. Now, since we are using the iterator throughout the loop, `'a` has to span the loop.
 111 // This `b` is borrowed for the duration of the loop, and we cannot mutate it. This is yet another example for
 112 // how the combination of mutation and aliasing leads to undesired effects (not necessarily crashes, like in Java),
 113 // which Rust successfully prevents.
 114 //
 115 // Technically speaking, there's one more subtlety that I did not explain yet. We never explicitly tied the lifetime `'a` of the
 116 // iterator to the loop so how does this happen? The answer lies in the full type of `next()`:
 117 // `fn<'a, 'b>(&'b mut Iter<'a>) -> Option<u64>`. Since `next()` takes a *borrowed* iterator, there are two lifetimes involved:
 118 // The lifetime of the borrow of the iterator, and the lifetime of the iterator itself. In such a case of nested lifetimes,
 119 // Rust implicitly adds the additional constraint that the inner lifetime *outlives* the outer one: The borrow of an iterator
 120 // cannot be valid for longer than the iterator itself is valid. This means that the lifetime `'a` of the iterator needs
 121 // to outlive every call to `next()`, and hence the loop. Lucky enough, this all happens without our intervention.
 122
 123