src/part16.rs

   1 // Rust-101, Part 16: Unsafe Rust, Drop
   2 // ====================================
   3
   4 use std::ptr;
   5 use std::mem;
   6 use std::marker::PhantomData;
   7
   8 //@ As we saw, the rules Rust imposes to ensure memory safety can get us pretty far. A large amount
   9 //@ of programming patterns can be written within safe Rust, and, more importantly, library code
  10 //@ like iterators or threads can make use of the type system to ensure some level of correctness
  11 //@ beyond basic memory safety.
  12 //@
  13 //@ However, there will still be programs that one cannot write in accordance with the borrow
  14 //@ checker. And there will be cases where it may be possible to satisfy the compiler, but only at
  15 //@ the cost of some run-time overhead, as we saw with `RefCell` - overhead which may not be
  16 //@ acceptable. In such a situation, it is possible to use *unsafe* Rust: That's a part of the
  17 //@ language that is *known* to open the gate to invalid pointer access and all other sorts of
  18 //@ memory safety. Of course, `unsafe` also means "Here Be Dragons": You are on your own now. <br/>
  19 //@ The goal in these cases is to confine unsafety to the local module. Types like `Rc` and `Vec`
  20 //@ are implemented using unsafe Rust, but *using* them as we did is (believed to be) perfectly
  21 //@ safe.
  22 //@
  23 //@ ## Unsafe Code
  24 //@ As an example, let us write a doubly-linked list. Clearly, such a data-structure involves
  25 //@ aliasing and mutation: Every node in the list is pointed to by its left and right neighbor, but
  26 //@ still we will want to modify the nodes. We could now try some clever combination of `Rc` and
  27 //@ `RefCell`, but this would end up being quite annoying - and it would incur some overhead. For a
  28 //@ low-level data-structure like a doubly-linked list, it makes sense to implement an efficient
  29 //@ version once, that is unsafe internally, but that can be used without any risk by safe client
  30 //@ code.
  31
  32 //@ As usually, we start by defining the types. Everything is parameterized by the type `T` of the
  33 //@ data stored in the list.
  34 // A node of the list consists of the data, and two node pointers for the predecessor and successor.
  35 struct Node<T> {
  36     next: NodePtr<T>,
  37     prev: NodePtr<T>,
  38     data: T,
  39 }
  40 // A node pointer is a *mutable raw pointer* to a node.
  41 //@ Raw pointers (`*mut T` and `*const T`) are the Rust equivalent of pointers in C. Unlike
  42 //@ references, they do not come with any guarantees: Raw pointers can be null, or they can point
  43 //@ to garbage. They don't have a lifetime, either.
  44 type NodePtr<T> = *mut Node<T>;
  45
  46 // The linked list itself stores pointers to the first and the last node. In addition, we tell Rust
  47 // that this type will own data of type `T`.
  48 //@ The type `PhantomData<T>` does not actually store anything in memory - it has size zero.
  49 //@ However, logically, Rust will consider a `T` to be present. In this case, Rust knows that data
  50 //@ of type `T` may be dropped whenever a `LinkedList<T>` is dropped. Dropping has a lot of subtle
  51 //@ checks to it, making sure that things can't go wrong. For this to work, Rust needs to know
  52 //@ which types could potentially be dropped. In safe Rust, this can all be inferred automatically,
  53 //@ but here, we just have a `*mut Node<T>`, and we need to tell Rust that we actually own such
  54 //@ data and will drop it.
  55 //@ (For more of the glory details, see
  56 //@ [this RFC](https://github.com/rust-lang/rfcs/blob/master/text/0769-sound-generic-drop.md).)
  57 pub struct LinkedList<T> {
  58     first: NodePtr<T>,
  59     last:  NodePtr<T>,
  60     _marker: PhantomData<T>,
  61 }
  62
  63 //@ Before we get to the actual linked-list methods, we write two short helper functions converting
  64 //@ between mutable raw pointers, and boxed data. Both employ `mem::transmute`, which can convert
  65 //@ anything to anything, by just re-interpreting the bytes.
  66 //@ Clearly, that's an unsafe operation and must only be used with great care - or even better, not
  67 //@ at all. Seriously. If at all possible, you should never use `transmute`. <br/>
  68 //@ We are making the assumption here that a `Box` and a raw pointer have the same representation
  69 //@ in memory. In the future, Rust will
  70 //@ [provide](https://doc.rust-lang.org/beta/alloc/boxed/struct.Box.html#method.from_raw) such
  71 //@ [operations](https://doc.rust-lang.org/beta/alloc/boxed/struct.Box.html#method.into_raw) in the
  72 //@ standard library, but the exact API is still being fleshed out.
  73
  74 //@ We declare `raw_into_box` to be an `unsafe` function, telling Rust that calling this function
  75 //@ is not generally safe. This grants us the unsafe powers for the body of the function: We can
  76 //@ dereference raw pointers, and - most importantly - we can call unsafe functions. (The other
  77 //@ unsafe powers won't be relevant here. Go read
  78 //@ [The Rustonomicon](https://doc.rust-lang.org/nightly/nomicon/) if you want to learn all about
  79 //@ this, but be warned - That Way Lies Madness.) <br/>
  80 //@ Here, the caller will have to ensure that `r` is a valid pointer, and that nobody else has a
  81 //@ pointer to this data.
  82 unsafe fn raw_into_box<T>(r: *mut T) -> Box<T> {
  83     mem::transmute(r)
  84 }
  85 //@ The case is slightly different for `box_into_raw`: Converting a `Box` to a raw pointer is
  86 //@ always safe. It just drops some information. Hence we keep the function itself safe, and use an
  87 //@ *unsafe block* within the function. This is an (unchecked) promise to the Rust compiler, saying
  88 //@ that a safe invocation of `box_into_raw` cannot go wrong. We also have the unsafe powers in the
  89 //@ unsafe block.
  90 fn box_into_raw<T>(b: Box<T>) -> *mut T {
  91     unsafe { mem::transmute(b) }
  92 }
  93
  94 impl<T> LinkedList<T> {
  95     // A new linked list just contains null pointers. `PhantomData` is how we construct any
  96     // `PhantomData<T>`.
  97     pub fn new() -> Self {
  98         LinkedList { first: ptr::null_mut(), last: ptr::null_mut(), _marker: PhantomData }
  99     }
 100
 101     // This function adds a new node to the end of the list.
 102     pub fn push_back(&mut self, t: T) {
 103         // Create the new node, and make it a raw pointer.
 104         //@ Calling `box_into_raw` gives up ownership of the box, which is crucial: We don't want
 105         //@ the memory that it points to to be deallocated!
 106         let new = Box::new( Node { data: t, next: ptr::null_mut(), prev: self.last } );
 107         let new = box_into_raw(new);
 108         // Update other pointers to this node.
 109         if self.last.is_null() {
 110             debug_assert!(self.first.is_null());
 111             // The list is currently empty, so we have to update the head pointer.
 112             self.first = new;                                       /*@*/
 113         } else {
 114             debug_assert!(!self.first.is_null());
 115             // We have to update the `next` pointer of the tail node.
 116             //@ Since Rust does not know that a raw pointer actually points to anything,
 117             //@ dereferencing such a pointer is an unsafe operation. So this unsafe block promises
 118             //@ that the pointer will actually be valid.
 119             unsafe { (*self.last).next = new; }                     /*@*/
 120         }
 121         // Make this the last node.
 122         self.last = new;
 123     }
 124
 125     // **Exercise 16.1**: Add some more operations to `LinkedList`: `pop_back`, `push_front` and
 126     // `pop_front`. Add testcases for `push_back` and all of your functions. The `pop` functions
 127     // should take `&mut self` and return `Option<T>`.
 128
 129     // Next, we are going to provide an iterator.
 130     //@ This function just creates an instance of `IterMut`, the iterator type which does the actual
 131     //@ work.
 132     pub fn iter_mut(&mut self) -> IterMut<T> {
 133         IterMut { next: self.first, _marker: PhantomData  }
 134     }
 135 }
 136
 137 //@ What does the iterator need to store? Strictly speaking, all it needs is the pointer to the
 138 //@ next node that it is going to visit. However, how do we make sure that this pointer remains
 139 //@ valid? We have to get this right ourselves, as we left the safe realms of borrowing and
 140 //@ ownership. Remember that the key ingredient for iterator safety was to tie the lifetime of the
 141 //@ iterator to the lifetime of the reference used for `iter_mut`. We will thus give `IterMut` two
 142 //@ parameters: A type parameter specifying the type of the data, and a lifetime parameter
 143 //@ specifying for how long the list was borrowed to the iterator.
 144
 145 //@ For Rust to accept the type, we have to add two more annotations. First of all, we have to
 146 //@ ensure that the data in the list lives at least as long as the iterator: If you drop the `T:
 147 //@ 'a`, Rust will tell you to add it back. And secondly, Rust will complain if `'a` is not
 148 //@ actually used in the struct. It doesn't know what it is supposed to do with that lifetime. So
 149 //@ we use `PhantomData` again to tell it that in terms of ownership, this type actually (uniquely)
 150 //@ borrows a linked list. This has no operational effect, but it means that Rust can deduce the
 151 //@ intent we had when adding that seemingly useless lifetime parameter.
 152 pub struct IterMut<'a, T> where T: 'a {
 153     next: NodePtr<T>,
 154     _marker: PhantomData<&'a mut LinkedList<T>>,
 155 }
 156
 157 //@ When implementing `Iterator` for `IterMut`, the fact that we have the lifetime `'a` around
 158 //@ immediately pays of: We would not even be able to write down the type `Item` without that
 159 //@ lifetime.
 160 impl<'a, T> Iterator for IterMut<'a, T> {
 161     type Item = &'a mut T;
 162
 163     fn next(&mut self) -> Option<Self::Item> {
 164         // The actual iteration is straight-forward: Once we reached a null pointer, we are done.
 165         if self.next.is_null() {
 166             None
 167         } else {
 168             // Otherwise, we can convert the next pointer to a reference, get a reference to the data
 169             // and update the iterator.
 170             let next = unsafe { &mut *self.next };
 171             let ret = &mut next.data;
 172             self.next = next.next;                                  /*@*/
 173             Some(ret)                                               /*@*/
 174         }
 175     }
 176 }
 177
 178 //@ In `next` above, we made crucial use of the assumption that `self.next` is either null or a
 179 //@ valid pointer. This only works because if someone tries to delete elements from a list during
 180 //@ iteration, we know that the borrow checker will catch them: If they call `next`, the lifetime
 181 //@ `'a` we artificially added to the iterator has to still be active, which means the mutable
 182 //@ reference passed to `iter_mut` is still active, which means nobody can delete anything from the
 183 //@ list. In other words, we make use of the expressive type system of Rust, decorating our own
 184 //@ unsafe implementation with just enough information so that Rust can check *uses* of the linked-
 185 //@ list. If the type system were weaker, we could not write a linked-list like the above with a
 186 //@ safe interface!
 187
 188 // **Exercise 16.2**: Add a method `iter` and a type `Iter` providing iteration for shared
 189 // references. Add testcases for both kinds of iterators.
 190
 191 // ## `Drop`
 192 //@ The linked list we wrote is already working quite nicely, but there is one problem: When the
 193 //@ list is dropped, nobody bothers to deallocate the remaining nodes. Even worse, if `T` itself
 194 //@ has a destructor that needs to clean up, it is not called for the element remaining in the
 195 //@ list. We need to take care of that ourselves.
 196
 197 //@ In Rust, adding a destructor for a type is done by implementing the `Drop` trait. This is a
 198 //@ very special trait. It can only be implemented for *nominal types*, i.e., you cannot implement
 199 //@ `Drop` for `&mut T`. You also cannot restrict the type and lifetime parameters further than the
 200 //@ type does - the `Drop` implementation has to apply to *all* instances of `LinkedList`.
 201 impl<T> Drop for LinkedList<T> {
 202     // The destructor itself is a method which takes `self` in mutably borrowed form. It cannot own
 203     // `self`, because then the destructor of `self` would be called at the end of the function,
 204     // resulting in endless recursion.
 205     fn drop(&mut self) {
 206         let mut cur_ptr = self.first;
 207         while !cur_ptr.is_null() {
 208             // In the destructor, we just iterate over the entire list, successively obtaining
 209             // ownership (`Box`) of every node. When the box is dropped, it will call the destructor
 210             // on `data` if necessary, and subsequently free the node on the heap.
 211             //@ We call `drop` explicitly here just for documentation purposes.
 212             let cur = unsafe { raw_into_box(cur_ptr) };
 213             cur_ptr = cur.next;
 214             drop(cur);
 215         }
 216     }
 217 }
 218
 219 // ## The End
 220 //@ Congratulations! You completed Rust-101. This was the last part of the course. I hope you
 221 //@ enjoyed it. If you have feedback or want to contribute yourself, please head to the
 222 //@ [Rust-101](https://www.ralfj.de/projects/rust-101/) website fur further information. The entire
 223 //@ course is open-source (under [CC-BY-SA 4.0](https://creativecommons.org/licenses/by-sa/4.0/)).
 224 //@
 225 //@ If you want to do more, the examples you saw in this course provide lots of playground for
 226 //@ coming up with your own little extensions here and there. The [index](main.html) contains some
 227 //@ more links to additional resources you may find useful.
 228 //@ With that, there's only one thing left to say: Happy Rust Hacking!
 229
 230 //@ [index](main.html) | [previous](part15.html) | [raw source](workspace/src/part16.rs) | next