From: Ralf Jung Date: Sat, 18 Jul 2015 21:26:18 +0000 (+0200) Subject: write part 16 X-Git-Url: https://git.ralfj.de/rust-101.git/commitdiff_plain/a2eeb1b93e8f52b2119fb11d56f5ffc764ac747b?ds=inline write part 16 --- diff --git a/solutions/src/list.rs b/solutions/src/list.rs new file mode 100644 index 0000000..180627d --- /dev/null +++ b/solutions/src/list.rs @@ -0,0 +1,204 @@ + +use std::ptr; +use std::mem; +use std::marker::PhantomData; + +fn box_into_raw(b: Box) -> *mut T { + unsafe { mem::transmute(b) } +} +unsafe fn raw_into_box(r: *mut T) -> Box { + mem::transmute(r) +} + +struct Node { + data: T, + next: NodePtr, + prev: NodePtr, +} +type NodePtr = *mut Node; + +pub struct LinkedList { + first: NodePtr, + last: NodePtr, + _marker: PhantomData, +} + +impl LinkedList { + pub fn new() -> Self { + LinkedList { first: ptr::null_mut(), last: ptr::null_mut(), _marker: PhantomData } + } + + pub fn push_back(&mut self, t: T) { + // Create the new node. + let new = Box::new( Node { data: t, next: ptr::null_mut(), prev: self.last } ); + let new = box_into_raw(new); + // Update other points to this node. + if self.last.is_null() { + debug_assert!(self.first.is_null()); + self.first = new; + } else { + debug_assert!(!self.first.is_null()); + unsafe { (*self.last).next = new; } + } + // Make this the last node. + self.last = new; + } + + pub fn pop_back(&mut self) -> Option { + if self.last.is_null() { + None + } else { + let last = self.last; + let new_last = unsafe { (*self.last).prev }; + self.last = new_last; + if new_last.is_null() { + // The list is now empty. + self.first = new_last; + } + let last = unsafe { raw_into_box(last) } ; + Some(last.data) + } + } + + pub fn push_front(&mut self, t: T) { + // Create the new node. + let new = Box::new( Node { data: t, next: self.first, prev: ptr::null_mut() } ); + let new = box_into_raw(new); + // Update other points to this node. + if self.first.is_null() { + debug_assert!(self.last.is_null()); + self.last = new; + } + else { + debug_assert!(!self.last.is_null()); + unsafe { (*self.first).prev = new; } + } + // Make this the first node. + self.first = new; + } + + pub fn pop_front(&mut self) -> Option { + if self.first.is_null() { + None + } else { + let first = self.first; + let new_first = unsafe { (*self.first).next }; + self.first = new_first; + if new_first.is_null() { + // The list is now empty. + self.last = new_first; + } + let first = unsafe { raw_into_box(first) } ; + Some(first.data) + } + } + + pub fn for_each(&mut self, mut f: F) { + let mut cur_ptr = self.first; + while !cur_ptr.is_null() { + // Iterate over every node, and call `f`. + f(unsafe{ &mut (*cur_ptr).data }); + cur_ptr = unsafe{ (*cur_ptr).next }; + } + } + + pub fn iter_mut(&self) -> IterMut { + IterMut { next: self.first, _marker: PhantomData } + } +} + +pub struct IterMut<'a, T> where T: 'a { + next: NodePtr, + _marker: PhantomData<&'a T>, +} + +impl<'a, T> Iterator for IterMut<'a, T> { + type Item = &'a mut T; + + fn next(&mut self) -> Option { + if self.next.is_null() { + None + } else { + let ret = unsafe{ &mut (*self.next).data }; + self.next = unsafe { (*self.next).next }; + Some(ret) + } + } +} + +impl Drop for LinkedList { + fn drop(&mut self) { + let mut cur_ptr = self.first; + while !cur_ptr.is_null() { + let cur = unsafe { raw_into_box(cur_ptr) }; + cur_ptr = cur.next; + drop(cur); + } + } +} + +#[cfg(test)] +mod tests { + use std::rc::Rc; + use std::cell::Cell; + use super::LinkedList; + + #[test] + fn test_pop_back() { + let mut l: LinkedList = LinkedList::new(); + for i in 0..3 { + l.push_front(-i); + l.push_back(i); + } + + assert_eq!(l.pop_back(), Some(2)); + assert_eq!(l.pop_back(), Some(1)); + assert_eq!(l.pop_back(), Some(0)); + assert_eq!(l.pop_back(), Some(-0)); + assert_eq!(l.pop_back(), Some(-1)); + assert_eq!(l.pop_back(), Some(-2)); + assert_eq!(l.pop_back(), None); + assert_eq!(l.pop_back(), None); + } + + #[test] + fn test_pop_front() { + let mut l: LinkedList = LinkedList::new(); + for i in 0..3 { + l.push_front(-i); + l.push_back(i); + } + + assert_eq!(l.pop_front(), Some(-2)); + assert_eq!(l.pop_front(), Some(-1)); + assert_eq!(l.pop_front(), Some(-0)); + assert_eq!(l.pop_front(), Some(0)); + assert_eq!(l.pop_front(), Some(1)); + assert_eq!(l.pop_front(), Some(2)); + assert_eq!(l.pop_front(), None); + assert_eq!(l.pop_front(), None); + } + + #[derive(Clone)] + struct DropChecker { + count: Rc>, + } + impl Drop for DropChecker { + fn drop(&mut self) { + self.count.set(self.count.get() + 1); + } + } + + #[test] + fn test_drop() { + let count = DropChecker { count: Rc::new(Cell::new(0)) }; + { + let mut l = LinkedList::new(); + for _ in 0..10 { + l.push_back(count.clone()); + l.push_front(count.clone()); + } + } + assert_eq!(count.count.get(), 20); + } +} diff --git a/solutions/src/main.rs b/solutions/src/main.rs index 0242f49..daafe08 100644 --- a/solutions/src/main.rs +++ b/solutions/src/main.rs @@ -8,8 +8,9 @@ extern crate docopt; pub mod bigint; pub mod vec; pub mod rgrep; -pub mod counter; pub mod callbacks; +pub mod counter; +pub mod list; pub fn main() { rgrep::main(); diff --git a/src/main.rs b/src/main.rs index 1317856..19abe4e 100644 --- a/src/main.rs +++ b/src/main.rs @@ -104,6 +104,7 @@ mod part12; mod part13; mod part14; mod part15; +mod part16; // To actually run the code of some part (after filling in the blanks, if necessary), simply edit the `main` // function. diff --git a/src/part12.rs b/src/part12.rs index 40d9ad5..d5b7ce3 100644 --- a/src/part12.rs +++ b/src/part12.rs @@ -9,6 +9,7 @@ use std::cell::{Cell, RefCell}; //@ (There's not even an automatic derivation happening for the cases where it would be possible.) //@ This restriction propagates up to `Callbacks` itself. What could we do about this? +//@ ## `Rc` //@ The solution is to find some way of cloning `Callbacks` without cloning the environments. This can be achieved with //@ `Rc`, a *reference-counted* pointer. This is is another example of a smart pointer. You can `clone` an `Rc` as often //@ as you want, that doesn't affect the data it contains. It only creates more references to the same data. Once all the diff --git a/src/part15.rs b/src/part15.rs index 9ae5aaf..ef2564a 100644 --- a/src/part15.rs +++ b/src/part15.rs @@ -10,6 +10,7 @@ use std::thread; //@ there are no data races. In Rust, shared-memory concurrency is obtained through *interior mutability*, //@ which we already discussed in a single-threaded context in part 12. //@ +//@ ## `Mutex` //@ The most basic type for interior mutability that supports concurrency is [`Mutex`](http://doc.rust-lang.org/stable/std/sync/struct.Mutex.html). //@ This type implements *critical sections* (or *locks*), but in a data-driven way: One has to specify //@ the type of the data that's protected by the mutex, and Rust ensures that the data is *only* accessed @@ -111,7 +112,7 @@ pub fn main() { // **Exercise 15.3**: Change the code above to use `RwLock`, such that multiple calls to `get` can be executed at the same time. -//@ ## Sync +//@ ## `Sync` //@ Clearly, if we had used `RefCell` rather than `Mutex`, the code above could not work: `RefCell` is not prepared for //@ multiple threads trying to access the data at the same time. How does Rust make sure that we don't accidentally use //@ `RefCell` across multiple threads? diff --git a/src/part16.rs b/src/part16.rs new file mode 100644 index 0000000..a613430 --- /dev/null +++ b/src/part16.rs @@ -0,0 +1,196 @@ +// Rust-101, Part 16: Unsafe, Drop (WIP) +// =============================== + +use std::ptr; +use std::mem; +use std::marker::PhantomData; + +//@ As we saw, the rules Rust imposes can get us pretty far: A surprising amount of programming patterns +//@ can be written within safe Rust, and, more importantly, library code like iterators or threads can make +//@ use of the type system to ensure some level of correctness beyond basic memory safety. +//@ +//@ However, there will still be programs that one cannot write in accordance with the borrow checker. And there +//@ will be cases where it may be possible to satisfy the compiler, but only at the cost of some run-time overhead, +//@ as we saw with `RefCell` - overhead which may not be acceptable. In such a situation, it is possible to +//@ use *unsafe* Rust: That's a part of the language that is *known* to open the gate to invalid pointer access +//@ and all other sorts of memory safety. It is typically disabled, guarded by the keyword `unsafe`. Of course, +//@ `unsafe` also means "Here Be Dragons": You are on your own now. Types like `Rc` and `Vec` are implemented +//@ `using unsafe Rust. +//@ +//@ ## Unsafe Code +//@ As an example, let us write a doubly-linked list. Clearly, such a data-structure involves aliasing and mutation: +//@ Every node in the list is pointed to by its left and right neighbor, but still we will want to modify the nodes +//@ (either to change the value at that place, or to insert new nodes). We could now try some clever combination of +//@ `Rc` and `RefCell`, but this would end up being quite annoying - and it would incur some over-head. For a low-level +//@ data-structure like a doubly-linked list, it makes sense to implement an efficient version *once*, that is unsafe +//@ internally, but taht can be used without any risk by safe client code. + +//@ As usually, we start by defining the types. Everything is parameterized by the type `T` of the data stored in the list. +// A node of the list consists of the data, and two node pointers for the predecessor and successor. +struct Node { + next: NodePtr, + prev: NodePtr, + data: T, +} +// A node pointer is a *mutable raw point* to a node. +//@ Raw pointers (`*mut T` and `*const T`) are the Rust equivalent of pointers in C. Unlike borrows, they do not come with +//@ any guarantees: Raw pointers can be null, or they can point to garbage. They don't have a lifetime. +type NodePtr = *mut Node; + +// The linked list itself stores pointers to the first and the last node. In addition, we tell Rust that this type +// will own data of type `T`. +//@ The type `PhantomData` does not actually store anything in memory - it has size zero. However, logically, +//@ Rust will consider a `T` to be present. In this case, Rust knows that data of type `T` may be dropped +//@ whenever a `LinkedList` is dropped. The checks involving destructors are pretty subtle, so it's always +//@ a good idea to provide such extra information. In safe Rust, this can all be done automatically, but here, +//@ we just have a `*mut Node`, which Rust does not consider as actually owning the data it points to. +pub struct LinkedList { + first: NodePtr, + last: NodePtr, + _marker: PhantomData, +} + +//@ Before we get to the actual linked-list methods, we write two short helper functions converting between +//@ mutable raw pointers, and owned pointers (aka `Box`). Both employ `mem::transmute`, which is Rust's +//@ `reinterpret_cast`: It can convert anything to anything, by just re-interpreting the bytes. Clearly, +//@ that's an unsafe operation. + +//@ We declare `raw_into_box` to be an `unsafe` function, telling Rust that calling this function is not generally safe. +//@ The caller will have to ensure that `r` is a valid pointer, and that nobody else has a pointer to this data. +unsafe fn raw_into_box(r: *mut T) -> Box { + mem::transmute(r) +} +//@ The case is slightly different for `box_into_raw`: Converting a `Box` to a raw pointer is always safe. I just drops some +//@ information. Hence we keep the function itself safe, and use an *unsafe block* within the function. This is an (unchecked) +//@ promise to the Rust compiler, saying that even though the code inside that block *could* go wrong, we actually know that +//@ it will not. +fn box_into_raw(b: Box) -> *mut T { + unsafe { mem::transmute(b) } +} + +impl LinkedList { + // A new linked list just contains null pointers. `PhantomData` is how we construct any `PhantomData`. + pub fn new() -> Self { + LinkedList { first: ptr::null_mut(), last: ptr::null_mut(), _marker: PhantomData } + } + + // Add a new node to the end of the list. + pub fn push_back(&mut self, t: T) { + // Create the new node, and make it a raw pointer. + //@ Calling `box_into_raw` gives up ownership of the box, which is crucial: We don't want the + //@ memory that it points to to be deallocated! + let new = Box::new( Node { data: t, next: ptr::null_mut(), prev: self.last } ); + let new = box_into_raw(new); + // Update other points to this node. + if self.last.is_null() { + debug_assert!(self.first.is_null()); + // The list is currently empty, so we have to update the head pointer. + self.first = new; /*@*/ + } else { + debug_assert!(!self.first.is_null()); + // We have to update the `next` pointer of the tail node. + //@ Since Rust does not know that a raw pointer actually to anything, dereferencing such a pointer is + //@ an unsafe operation. So this unsafe block promises that the pointer will actually be valid. + unsafe { (*self.last).next = new; } /*@*/ + } + // Make this the last node. + self.last = new; + } + + // **Exercise 16.1**: Add some more operations to `LinkedList`: `pop_back`, `push_front` and `pop_front`. + // Add testcases for `push_back` and all of your functions. The `pop` functions should take `&mut self` + // and return `Option`. + + // Of course, we will also want to provide an iterator. + //@ This function just creates an instance of `IterMut`, the iterator type which does the actual work. + pub fn iter_mut(&self) -> IterMut { + IterMut { next: self.first, _marker: PhantomData } + } +} + +//@ What does the iterator need to store? Strictly speaking, all it needs is the pointer to the next node +//@ that it is going to visit. However, how do we make sure that this pointer remains valid? We have to +//@ get this right ourselves, as we left the safe realms of borrowing and ownership. Remember that the +//@ key ingredient for iterator safety was to tie the lifetime of the iterator to the lifetime of the +//@ borrow used for `iter_mut`. We will thus give `IterMut` two parameters: A type parameter specifying +//@ the type of the data, and a lifetime parameter specifying for how long the list was borrowed to the +//@ iterator. +//@ +//@ For Rust to accept the type, we have to add two more annotations. First of all, we have to ensure that +//@ the data in the list lives at least as long as the iterator: If you drop the `T: 'a`, Rust will tell +//@ you to add it back. And secondly, Rust will complain if `'a` is not actually used in the struct. +//@ It doesn't know what it is supposed to do with that lifetime. So we use `PhantomData` again to tell +//@ it that in terms of ownership, this type actually (mutably) borrows a linked list. This has no +//@ operational effect, but it means that Rust can deduce the intent we had when adding that +//@ seemingly useless lifetime parameter. +pub struct IterMut<'a, T> where T: 'a { + next: NodePtr, + _marker: PhantomData<&'a mut LinkedList>, +} + +//@ When implementing `Iterator` for `IterMut`, the fact that we have the lifetime `'a` around immediately +//@ pays of: We would not even be able to write down the type `Item` without that lifetime. +impl<'a, T> Iterator for IterMut<'a, T> { + type Item = &'a mut T; + + fn next(&mut self) -> Option { + // The actual iteration is straight-forward: Once we reached a null pointer, we are done. + if self.next.is_null() { + None + } else { + // Otherwise, we can convert the next pointer to a borrow, get a borrow to the data + // and update the iterator. + let next = unsafe { &mut *self.next }; + let ret = &mut next.data; + self.next = next.next; + Some(ret) + } + } +} + +//@ In `next` above, we made crucial use of the assumption that `self.next` is either null or a valid pointer. +//@ This only works because if someone tries to delete elements from a list during iteration, we know that the borrow checker +//@ will catch them: If they call `next`, the lifetime `'a` we artificially added to the iterator has to still be +//@ active, which means the mutable borrow passed to `iter_mut` is still active, which means nobody can delete +//@ anything from the list. In other words, we make use of the expressive type system of Rust, decorating +//@ our own unsafe implementation with just enough information so that Rust can check *uses* of the linked-list. +//@ If the type system were weaker, we could not write a linked-list like the above with a safe interface! + +// **Exercise 16.2**: Add a method `iter` and a type `Iter` providing iteration for shared borrows. +// Add testcases for both kinds of iterators. + +// ## `Drop` +//@ The linked list we wrote is already working quite nicely, but there is one problem: When the list is removed, +//@ nobody bothers to deallocate the remaining nodes. Even worse, if `T` itself has a destructor that needs to +//@ clean up, it is not called for the element remaining in the list. We need to take care of that ourselves. +//@ +//@ In Rust, adding a destructor for a type is done by implementing the `Drop` trait. This is a very special trait. +//@ It can only be implemented for *nominal types*, i.e., you cannot implement `Drop` for `&mut T`. You also cannot +//@ restrict the type and lifetime parameters further - the `Drop` implementation has to apply to *all* instances +//@ of `LinkedList`. +impl Drop for LinkedList { + // The destructor itself is a method which takes `self` in mutably borrowed form. It cannot own `self`, because then + // the destructor of `self` would be called at the end pf the function, resulting in endless recursion... + fn drop(&mut self) { + let mut cur_ptr = self.first; + while !cur_ptr.is_null() { + // In the destructor, we just iterate over the entire list, successively obtaining ownership + // (`Box`) of every node. When the box is dropped, it will call the destructor on `data` if + // necessary, and subsequently free the node on the heap. + //@ We call `drop` explicitly here just for documentation purposes. + let cur = unsafe { raw_into_box(cur_ptr) }; + cur_ptr = cur.next; + drop(cur); + } + } +} + +//@ ## The End +//@ Congratulations! You complete Rust-101. This was the last example of the last part. I hope you enjoyed it. +//@ If you have feedback, please head to the [Rust-101](https://www.ralfj.de/projects/rust-101/) website +//@ and let me know how you liked it. The entire course is open-source (under CC-BY-SA 4.0), and contributions are welcome! +//@ +//@ The [index](main.html) contains some more links to additional resources you may find useful. With that, there's +//@ only one thing left to say: Happy Rust Hacking! + +//@ [index](main.html) | [previous](part15.html) diff --git a/workspace/src/main.rs b/workspace/src/main.rs index a86a65a..77632d9 100644 --- a/workspace/src/main.rs +++ b/workspace/src/main.rs @@ -17,6 +17,7 @@ mod part12; mod part13; mod part14; mod part15; +mod part16; // This decides which part is actually run. fn main() { diff --git a/workspace/src/part16.rs b/workspace/src/part16.rs new file mode 100644 index 0000000..a804f8e --- /dev/null +++ b/workspace/src/part16.rs @@ -0,0 +1,113 @@ +// Rust-101, Part 16: Unsafe, Drop (WIP) +// =============================== + +use std::ptr; +use std::mem; +use std::marker::PhantomData; + + +// A node of the list consists of the data, and two node pointers for the predecessor and successor. +struct Node { + next: NodePtr, + prev: NodePtr, + data: T, +} +// A node pointer is a *mutable raw point* to a node. +type NodePtr = *mut Node; + +// The linked list itself stores pointers to the first and the last node. In addition, we tell Rust that this type +// will own data of type `T`. +pub struct LinkedList { + first: NodePtr, + last: NodePtr, + _marker: PhantomData, +} + + +unsafe fn raw_into_box(r: *mut T) -> Box { + mem::transmute(r) +} +fn box_into_raw(b: Box) -> *mut T { + unsafe { mem::transmute(b) } +} + +impl LinkedList { + // A new linked list just contains null pointers. `PhantomData` is how we construct any `PhantomData`. + pub fn new() -> Self { + LinkedList { first: ptr::null_mut(), last: ptr::null_mut(), _marker: PhantomData } + } + + // Add a new node to the end of the list. + pub fn push_back(&mut self, t: T) { + // Create the new node, and make it a raw pointer. + let new = Box::new( Node { data: t, next: ptr::null_mut(), prev: self.last } ); + let new = box_into_raw(new); + // Update other points to this node. + if self.last.is_null() { + debug_assert!(self.first.is_null()); + // The list is currently empty, so we have to update the head pointer. + unimplemented!() + } else { + debug_assert!(!self.first.is_null()); + // We have to update the `next` pointer of the tail node. + unimplemented!() + } + // Make this the last node. + self.last = new; + } + + // **Exercise 16.1**: Add some more operations to `LinkedList`: `pop_back`, `push_front` and `pop_front`. + // Add testcases for `push_back` and all of your functions. The `pop` functions should take `&mut self` + // and return `Option`. + + // Of course, we will also want to provide an iterator. + pub fn iter_mut(&self) -> IterMut { + IterMut { next: self.first, _marker: PhantomData } + } +} + +pub struct IterMut<'a, T> where T: 'a { + next: NodePtr, + _marker: PhantomData<&'a mut LinkedList>, +} + +impl<'a, T> Iterator for IterMut<'a, T> { + type Item = &'a mut T; + + fn next(&mut self) -> Option { + // The actual iteration is straight-forward: Once we reached a null pointer, we are done. + if self.next.is_null() { + None + } else { + // Otherwise, we can convert the next pointer to a borrow, get a borrow to the data + // and update the iterator. + let next = unsafe { &mut *self.next }; + let ret = &mut next.data; + self.next = next.next; + Some(ret) + } + } +} + + +// **Exercise 16.2**: Add a method `iter` and a type `Iter` providing iteration for shared borrows. +// Add testcases for both kinds of iterators. + +// ## `Drop` +impl Drop for LinkedList { + // The destructor itself is a method which takes `self` in mutably borrowed form. It cannot own `self`, because then + // the destructor of `self` would be called at the end pf the function, resulting in endless recursion... + fn drop(&mut self) { + let mut cur_ptr = self.first; + while !cur_ptr.is_null() { + // In the destructor, we just iterate over the entire list, successively obtaining ownership + // (`Box`) of every node. When the box is dropped, it will call the destructor on `data` if + // necessary, and subsequently free the node on the heap. + let cur = unsafe { raw_into_box(cur_ptr) }; + cur_ptr = cur.next; + drop(cur); + } + } +} + +