From: Ralf Jung Date: Sun, 11 Feb 2018 16:50:59 +0000 (+0100) Subject: Merge pull request #29 from teknico/shorten-lines X-Git-Url: https://git.ralfj.de/rust-101.git/commitdiff_plain/801f2b59728fba1e13d3e34a08457b812f8c0f56?hp=9f9b301fd5e86ae4b8cf743f80a129e4addb3635 Merge pull request #29 from teknico/shorten-lines Shorten lines --- diff --git a/src/main.rs b/src/main.rs index 835a150..2a0aa76 100644 --- a/src/main.rs +++ b/src/main.rs @@ -43,12 +43,15 @@ // This will also install `cargo`, the tool responsible for building rust projects (or *crates*). // // Next, we have to prepare a workspace for you to conduct your Rust-101 work in, so that you don't -// have to start with an empty file. The easiest way is to [download the workspace](https://www.ralfj.de/projects/rust-101/workspace.zip) -// matching the online tutorial. Try `cargo build` in that new folder to check that compiling your workspace succeeds. -// (You can also execute it with `cargo run`, but you'll need to do some work before this does anything useful.) -// -// Alternatively, you can build the workspace from source by fetching the [git repository](https://www.ralfj.de/git/rust-101.git) -// and running `make workspace`. +// have to start with an empty file. The easiest way is to +// [download the workspace](https://www.ralfj.de/projects/rust-101/workspace.zip) +// matching the online tutorial. Try `cargo build` in that new folder to check that compiling your +// workspace succeeds. +// (You can also execute it with `cargo run`, but you'll need to do some work before this does +// anything useful.) +// +// Alternatively, you can build the workspace from source by fetching the +// [git repository](https://www.ralfj.de/git/rust-101.git) and running `make workspace`. // Course Content // -------------- @@ -101,8 +104,8 @@ mod part14; mod part15; mod part16; -// To actually run the code of some part (after filling in the blanks, if necessary), simply edit the `main` -// function. +// To actually run the code of some part (after filling in the blanks, if necessary), simply edit +// the `main` function. fn main() { part00::main(); } @@ -118,5 +121,7 @@ fn main() { // * [The Rustonomicon](https://doc.rust-lang.org/nightly/nomicon/) // * [Rust by Example](http://rustbyexample.com/) // * The [Rust Subreddit](https://www.reddit.com/r/rust/) -// * A [collection of links](https://github.com/ctjhoa/rust-learning) to blog posts, articles, videos, etc. for learning Rust. -// * For the IRC channel and other forums, see the "Community" section of the [Rust Documentation index](https://doc.rust-lang.org/index.html) +// * A [collection of links](https://github.com/ctjhoa/rust-learning) to blog posts, articles, +// videos, etc. for learning Rust. +// * For the IRC channel and other forums, see the "Community" section of the +// [Rust Documentation index](https://doc.rust-lang.org/index.html) diff --git a/src/part00.rs b/src/part00.rs index d126a17..d8b4ed7 100644 --- a/src/part00.rs +++ b/src/part00.rs @@ -6,10 +6,10 @@ //@ ## Getting started //@ Let us start by thinking about the *type* of our function. Rust forces us to give the types of -//@ all arguments, and the return type, before we even start writing the body. In the case of our minimum -//@ function, we may be inclined to say that it returns a number. But then we would be in trouble: What's -//@ the minimum of an empty list? The type of the function says we have to return *something*. -//@ We could just choose 0, but that would be kind of arbitrary. What we need +//@ all arguments, and the return type, before we even start writing the body. In the case of our +//@ minimum function, we may be inclined to say that it returns a number. But then we would be in +//@ trouble: What's the minimum of an empty list? The type of the function says we have to return +//@ *something*. We could just choose 0, but that would be kind of arbitrary. What we need //@ is a type that is "a number, or nothing". Such a type (of multiple exclusive options) //@ is called an "algebraic datatype", and Rust lets us define such types with the keyword `enum`. //@ Coming from C(++), you can think of such a type as a `union`, together with a field that @@ -37,22 +37,24 @@ fn vec_min(vec: Vec) -> NumberOrNothing { // Now we want to *iterate* over the list. Rust has some nice syntax for iterators: for el in vec { - // So `el` is an element of the list. We need to update `min` accordingly, but how do we get the current - // number in there? This is what pattern matching can do: + // So `el` is an element of the list. We need to update `min` accordingly, but how do we + // get the current number in there? This is what pattern matching can do: match min { - // In this case (*arm*) of the `match`, `min` is currently nothing, so let's just make it the number `el`. + // In this case (*arm*) of the `match`, `min` is currently nothing, so let's just make + // it the number `el`. NumberOrNothing::Nothing => { min = NumberOrNothing::Number(el); /*@*/ }, - // In this arm, `min` is currently the number `n`, so let's compute the new minimum and store it. + // In this arm, `min` is currently the number `n`, so let's compute the new minimum and + // store it. //@ We will write the function `min_i32` just after we completed this one. NumberOrNothing::Number(n) => { let new_min = min_i32(n, el); /*@*/ min = NumberOrNothing::Number(new_min); /*@*/ } } - //@ Notice that Rust makes sure you did not forget to handle any case in your `match`. We say - //@ that the pattern matching has to be *exhaustive*. + //@ Notice that Rust makes sure you did not forget to handle any case in your `match`. We + //@ say that the pattern matching has to be *exhaustive*. } // Finally, we return the result of the computation. return min; diff --git a/src/part01.rs b/src/part01.rs index 95e4593..a4537ac 100644 --- a/src/part01.rs +++ b/src/part01.rs @@ -43,9 +43,10 @@ fn compute_stuff(x: i32) -> i32 { // Let us now refactor `vec_min`. fn vec_min(v: Vec) -> NumberOrNothing { - //@ Remember that helper function `min_i32`? Rust allows us to define such helper functions *inside* other - //@ functions. This is just a matter of namespacing, the inner function has no access to the data of the outer - //@ one. Still, being able to nicely group functions can significantly increase readability. + //@ Remember that helper function `min_i32`? Rust allows us to define such helper functions + //@ *inside* other functions. This is just a matter of namespacing, the inner function has no + //@ access to the data of the outer one. Still, being able to nicely group functions can + //@ significantly increase readability. fn min_i32(a: i32, b: i32) -> i32 { if a < b { a } else { b } /*@*/ } @@ -106,4 +107,5 @@ pub fn main() { // **Exercise 01.2**: Write a function `vec_print` that takes a vector and prints all its elements. -//@ [index](main.html) | [previous](part00.html) | [raw source](workspace/src/part01.rs) | [next](part02.html) +//@ [index](main.html) | [previous](part00.html) | [raw source](workspace/src/part01.rs) | +//@ [next](part02.html) diff --git a/src/part02.rs b/src/part02.rs index 6a01087..a97c367 100644 --- a/src/part02.rs +++ b/src/part02.rs @@ -3,7 +3,8 @@ //@ Let us for a moment reconsider the type `NumberOrNothing`. Isn't it a bit annoying that we //@ had to hard-code the type `i32` in there? What if tomorrow, we want a `CharOrNothing`, and -//@ later a `FloatOrNothing`? Certainly we don't want to re-write the type and all its inherent methods. +//@ later a `FloatOrNothing`? Certainly we don't want to re-write the type and all its inherent +//@ methods. // ## Generic datatypes @@ -20,18 +21,20 @@ pub use self::SomethingOrNothing::*; //@ What this does is define an entire family of types: We can now write //@ `SomethingOrNothing` to get back our `NumberOrNothing`. type NumberOrNothing = SomethingOrNothing; -//@ However, we can also write `SomethingOrNothing` or even `SomethingOrNothing>`. -//@ In fact, a type like `SomethingOrNothing` is so useful that it is already present in the standard library: It's called an -//@ *option type*, written `Option`. Go check out its [documentation](https://doc.rust-lang.org/stable/std/option/index.html)! -//@ (And don't worry, there's indeed lots of material mentioned there that we have not covered yet.) +//@ However, we can also write `SomethingOrNothing` or even +//@ `SomethingOrNothing>`. In fact, a type like `SomethingOrNothing` is so +//@ useful that it is already present in the standard library: It's called an *option type*, +//@ written `Option`. Go check out its +//@ [documentation](https://doc.rust-lang.org/stable/std/option/index.html)! (And don't worry, +//@ there's indeed lots of material mentioned there that we have not covered yet.) // ## Generic `impl`, Static functions -//@ The types are so similar, that we can provide a generic function to construct a `SomethingOrNothing` -//@ from an `Option`, and vice versa. +//@ The types are so similar, that we can provide a generic function to construct a +//@ `SomethingOrNothing` from an `Option`, and vice versa. //@ //@ Notice the syntax for giving generic implementations to generic types: Think of the first `` -//@ as *declaring* a type variable ("I am doing something for all types `T`"), and the second `` as -//@ *using* that variable ("The thing I do, is implement `SomethingOrNothing`"). +//@ as *declaring* a type variable ("I am doing something for all types `T`"), and the second `` +//@ as *using* that variable ("The thing I do, is implement `SomethingOrNothing`"). //@ // Inside an `impl`, `Self` refers to the type we are implementing things for. Here, it is // an alias for `SomethingOrNothing`. @@ -98,13 +101,14 @@ pub fn vec_min(v: Vec) -> SomethingOrNothing { //@ Before going on, take a moment to ponder the flexibility of Rust's take on abstraction: //@ We just defined our own, custom trait (interface), and then implemented that trait //@ *for an existing type*. With the hierarchical approach of, e.g., C++ or Java, -//@ that's not possible: We cannot make an existing type also inherit from our abstract base class after the fact. +//@ that's not possible: We cannot make an existing type also inherit from our abstract base class +//@ after the fact. //@ //@ In case you are worried about performance, note that Rust performs *monomorphisation* //@ of generic functions: When you call `vec_min` with `T` being `i32`, Rust essentially goes //@ ahead and creates a copy of the function for this particular type, filling in all the blanks. -//@ In this case, the call to `T::min` will become a call to our implementation *statically*. There is -//@ no dynamic dispatch, like there would be for Java interface methods or C++ `virtual` methods. +//@ In this case, the call to `T::min` will become a call to our implementation *statically*. There +//@ is no dynamic dispatch, like there would be for Java interface methods or C++ `virtual` methods. //@ This behavior is similar to C++ templates. The optimizer (Rust is using LLVM) then has all the //@ information it could want to, e.g., inline function calls. @@ -117,9 +121,9 @@ impl Minimum for i32 { } // We again provide a `print` function. -//@ This also shows that we can have multiple `impl` blocks -//@ for the same type (remember that `NumberOrNothing` is just a type alias for `SomethingOrNothing`), -//@ and we can provide some methods only for certain instances of a generic type. +//@ This also shows that we can have multiple `impl` blocks for the same type (remember that +//@ `NumberOrNothing` is just a type alias for `SomethingOrNothing`), and we can provide some +//@ methods only for certain instances of a generic type. impl NumberOrNothing { pub fn print(self) { match self { @@ -143,7 +147,9 @@ pub fn main() { //@ If this printed `3`, then your generic `vec_min` is working! So get ready for the next part. -// **Exercise 02.1**: Change your program such that it computes the minimum of a `Vec` (where `f32` is the type -// of 32-bit floating-point numbers). You should not change `vec_min` in any way, obviously! +// **Exercise 02.1**: Change your program such that it computes the minimum of a `Vec` (where +// `f32` is the type // of 32-bit floating-point numbers). You should not change `vec_min` in any +// way, obviously! -//@ [index](main.html) | [previous](part01.html) | [raw source](workspace/src/part02.rs) | [next](part03.html) +//@ [index](main.html) | [previous](part01.html) | [raw source](workspace/src/part02.rs) | +//@ [next](part03.html) diff --git a/src/part03.rs b/src/part03.rs index 81f8714..44431e6 100644 --- a/src/part03.rs +++ b/src/part03.rs @@ -29,41 +29,46 @@ fn read_vec() -> Vec { //@ for that, but there is a catch: What happens if there is some other piece of code running //@ concurrently, that also reads from standard input? The result would be a mess. Hence //@ Rust requires us to `lock` standard input if we want to perform large operations on - //@ it. (See [the documentation](https://doc.rust-lang.org/stable/std/io/struct.Stdin.html) for more - //@ details.) + //@ it. (See [the documentation](https://doc.rust-lang.org/stable/std/io/struct.Stdin.html) for + //@ more details.) for line in stdin.lock().lines() { // Rust's type for (dynamic, growable) strings is `String`. However, our variable `line` // here is not yet of that type: It has type `io::Result`. - //@ The problem with I/O is that it can always go wrong. The type of `line` is a lot like `Option` ("a `String` or - //@ nothing"), but in the case of "nothing", there is additional information about the error. - //@ Again, I recommend to check [the documentation](https://doc.rust-lang.org/stable/std/io/type.Result.html). - //@ You will see that `io::Result` is actually just an alias for `Result`, so click on that to obtain + //@ The problem with I/O is that it can always go wrong. The type of `line` is a lot like + //@ `Option` ("a `String` or nothing"), but in the case of "nothing", there is + //@ additional information about the error. Again, I recommend to check + //@ [the documentation](https://doc.rust-lang.org/stable/std/io/type.Result.html). You will + //@ see that `io::Result` is actually just an alias for `Result`, so click on that to obtain //@ the list of all constructors and methods of the type. - //@ We will be lazy here and just assume that nothing goes wrong: `unwrap` returns the `String` if there is one, - //@ and panics the program otherwise. Since a `Result` carries some details about the error that occurred, - //@ there will be a somewhat reasonable error message. Still, you would not want a user to see such - //@ an error, so in a "real" program, we would have to do proper error handling. + //@ We will be lazy here and just assume that nothing goes wrong: `unwrap` returns the + //@ `String` if there is one, and panics the program otherwise. Since a `Result` carries + //@ some details about the error that occurred, there will be a somewhat reasonable error + //@ message. Still, you would not want a user to see such an error, so in a "real" program, + //@ we would have to do proper error handling. //@ Can you find the documentation of `Result::unwrap`? //@ - // I chose the same name (`line`) for the new variable to ensure that I will never, accidentally, - // access the "old" `line` again. + // I chose the same name (`line`) for the new variable to ensure that I will never, + // accidentally, access the "old" `line` again. let line = line.unwrap(); // Now that we have our `String`, we want to make it an `i32`. //@ We first `trim` the `line` to remove leading and trailing whitespace. - //@ `parse` is a method on `String` that can convert a string to anything. Try finding its documentation! + //@ `parse` is a method on `String` that can convert a string to anything. Try finding its + //@ documentation! - //@ In this case, Rust *could* figure out automatically that we need an `i32` (because of the return type - //@ of the function), but that's a bit too much magic for my taste. We are being more explicit here: - //@ `parse::` is `parse` with its generic type set to `i32`. + //@ In this case, Rust *could* figure out automatically that we need an `i32` (because of + //@ the return type of the function), but that's a bit too much magic for my taste. We are + //@ being more explicit here: `parse::` is `parse` with its generic type set to `i32`. match line.trim().parse::() { - //@ `parse` returns again a `Result`, and this time we use a `match` to handle errors (like, the user entering - //@ something that is not a number). - //@ This is a common pattern in Rust: Operations that could go wrong will return `Option` or `Result`. - //@ The only way to get to the value we are interested in is through pattern matching (and through helper functions - //@ like `unwrap`). If we call a function that returns a `Result`, and throw the return value away, - //@ the compiler will emit a warning. It is hence impossible for us to *forget* handling an error, - //@ or to accidentally use a value that doesn't make any sense because there was an error producing it. + //@ `parse` returns again a `Result`, and this time we use a `match` to handle errors + //@ (like, the user entering something that is not a number). + //@ This is a common pattern in Rust: Operations that could go wrong will return + //@ `Option` or `Result`. The only way to get to the value we are interested in is + //@ through pattern matching (and through helper functions like `unwrap`). If we call + //@ a function that returns a `Result`, and throw the return value away, the compiler + //@ will emit a warning. It is hence impossible for us to *forget* handling an error, + //@ or to accidentally use a value that doesn't make any sense because there was an + //@ error producing it. Ok(num) => { vec.push(num) /*@*/ }, @@ -77,9 +82,9 @@ fn read_vec() -> Vec { vec } -//@ So much for `read_vec`. If there are any questions left, the documentation of the respective function -//@ should be very helpful. Try finding the one for `Vec::push`. I will not always provide the links, -//@ as the documentation is quite easy to navigate and you should get used to that. +//@ So much for `read_vec`. If there are any questions left, the documentation of the respective +//@ function should be very helpful. Try finding the one for `Vec::push`. I will not always provide +//@ the links, as the documentation is quite easy to navigate and you should get used to that. // For the rest of the code, we just re-use part 02 by importing it with `use`. //@ I already sneaked a bunch of `pub` in part 02 to make this possible: Only @@ -94,14 +99,16 @@ pub fn main() { min.print(); /*@*/ } -// **Exercise 03.1**: Define a trait `Print` to write a generic version of `SomethingOrNothing::print`. +// **Exercise 03.1**: Define a trait `Print` to write a generic version of +// `SomethingOrNothing::print`. // Implement that trait for `i32`, and change the code above to use it. // I will again provide a skeleton for this solution. It also shows how to attach bounds to generic // implementations (just compare it to the `impl` block from the previous exercise). // You can read this as "For all types `T` satisfying the `Print` trait, I provide an implementation // for `SomethingOrNothing`". // -// Notice that I called the function on `SomethingOrNothing` `print2` to disambiguate from the `print` defined previously. +// Notice that I called the function on `SomethingOrNothing` `print2` to disambiguate from the +// `print` defined previously. // // *Hint*: There is a macro `print!` for printing without appending a newline. pub trait Print { @@ -113,7 +120,8 @@ impl SomethingOrNothing { } } -// **Exercise 03.2**: Building on exercise 02.2, implement all the things you need on `f32` to make your -// program work with floating-point numbers. +// **Exercise 03.2**: Building on exercise 02.2, implement all the things you need on `f32` to make +// your program work with floating-point numbers. -//@ [index](main.html) | [previous](part02.html) | [raw source](workspace/src/part03.rs) | [next](part04.html) +//@ [index](main.html) | [previous](part02.html) | [raw source](workspace/src/part03.rs) | +//@ [next](part04.html) diff --git a/src/part04.rs b/src/part04.rs index 83eb87d..21654cd 100644 --- a/src/part04.rs +++ b/src/part04.rs @@ -37,9 +37,9 @@ fn ownership_demo() { //@ //@ If you give a book to your friend, you cannot just come to his place next day and get the book! //@ It's no longer yours. Rust makes sure you don't break this rule. Try enabling the commented -//@ line in `ownership_demo`. Rust will tell you that `v` has been *moved*, which is to say that ownership -//@ has been transferred somewhere else. In this particular case, the buffer storing the data -//@ does not even exist anymore, so we are lucky that Rust caught this problem! +//@ line in `ownership_demo`. Rust will tell you that `v` has been *moved*, which is to say that +//@ ownership has been transferred somewhere else. In this particular case, the buffer storing the +//@ data does not even exist anymore, so we are lucky that Rust caught this problem! //@ Essentially, ownership rules out aliasing, hence making the kind of problem discussed above //@ impossible. @@ -49,17 +49,17 @@ fn ownership_demo() { //@ vector. Hence, when `vec_min` finishes, the entire vector is deleted. That's of course not what //@ we wanted! Can't we somehow give `vec_min` access to the vector, while retaining ownership of it? //@ -//@ Rust calls this *a reference* to the vector, and it considers references as *borrowing* ownership. This -//@ works a bit like borrowing does in the real world: If your friend borrows a book from you, your friend -//@ can have it and work on it (and you can't!) as long as the book is still borrowed. Your friend could -//@ even lend the book to someone else. Eventually however, your friend has to give the book back to you, -//@ at which point you again have full control. +//@ Rust calls this *a reference* to the vector, and it considers references as *borrowing* +//@ ownership. This works a bit like borrowing does in the real world: If your friend borrows a +//@ book from you, your friend can have it and work on it (and you can't!) as long as the book is +//@ still borrowed. Your friend could even lend the book to someone else. Eventually however, your +//@ friend has to give the book back to you, at which point you again have full control. //@ //@ Rust distinguishes between two kinds of references. First of all, there's the *shared* reference. //@ This is where the book metaphor kind of breaks down... you can give a shared reference to //@ *the same data* to lots of different people, who can all access the data. This of course -//@ introduces aliasing, so in order to live up to its promise of safety, Rust generally does not allow -//@ mutation through a shared reference. +//@ introduces aliasing, so in order to live up to its promise of safety, Rust generally does not +//@ allow mutation through a shared reference. //@ So, let's re-write `vec_min` to work on a shared reference to a vector, written `&Vec`. //@ I also took the liberty to convert the function from `SomethingOrNothing` to the standard @@ -68,8 +68,8 @@ fn vec_min(v: &Vec) -> Option { use std::cmp; let mut min = None; - // This time, we explicitly request an iterator for the vector `v`. The method `iter` just borrows the vector - // it works on, and provides shared references to the elements. + // This time, we explicitly request an iterator for the vector `v`. The method `iter` just + // borrows the vector it works on, and provides shared references to the elements. for e in v.iter() { // In the loop, `e` now has type `&i32`, so we have to dereference it to obtain an `i32`. min = Some(match min { @@ -88,9 +88,9 @@ fn shared_ref_demo() { vec_min(&v); println!("The first element is: {}", *first); } -//@ What's going on here? First, `&` is how you lend ownership to someone - this operator creates a shared reference. -//@ `shared_ref_demo` creates three shared references to `v`: -//@ The reference `first` begins in the 2nd line of the function and lasts all the way to the end. The other two +//@ What's going on here? First, `&` is how you lend ownership to someone - this operator creates a +//@ shared reference. `shared_ref_demo` creates three shared references to `v`: The reference +//@ `first` begins in the 2nd line of the function and lasts all the way to the end. The other two //@ references, created for calling `vec_min`, only last for the duration of that respective call. //@ //@ Technically, of course, references are pointers. Notice that since `vec_min` only gets a shared @@ -98,13 +98,16 @@ fn shared_ref_demo() { //@ that was created before calling `vec_min` remains valid. // ## Unique, mutable references -//@ There is a second way to borrow something, a second kind of reference: The *mutable reference*. This is a reference that comes with the promise -//@ that nobody else has *any kind of access* to the referee - in contrast to shared references, there is no aliasing with mutable references. It is thus always safe to perform mutation through such a reference. -//@ Because there cannot be another reference to the same data, we could also call it a *unique* reference, but that is not their official name. +//@ There is a second way to borrow something, a second kind of reference: The *mutable reference*. +//@ This is a reference that comes with the promise that nobody else has *any kind of access* to +//@ the referee - in contrast to shared references, there is no aliasing with mutable references. +//@ It is thus always safe to perform mutation through such a reference. Because there cannot be +//@ another reference to the same data, we could also call it a *unique* reference, but that is not +//@ their official name. //@ As an example, consider a function which increments every element of a vector by 1. -//@ The type `&mut Vec` is the type of mutable references to `vec`. Because the reference is -//@ mutable, we can use a mutable iterator, providing mutable references to the elements. +//@ The type `&mut Vec` is the type of mutable references to `vec`. Because the reference +//@ is mutable, we can use a mutable iterator, providing mutable references to the elements. fn vec_inc(v: &mut Vec) { for e in v.iter_mut() { *e += 1; @@ -118,29 +121,36 @@ fn mutable_ref_demo() { vec_inc(&mut v); /* println!("The first element is: {}", *first); */ /* BAD! */ } -//@ `&mut` is the operator to create a mutable reference. We have to mark `v` as mutable in order to create such a -//@ reference: Even though we completely own `v`, Rust tries to protect us from accidentally mutating things. +//@ `&mut` is the operator to create a mutable reference. We have to mark `v` as mutable in order +//@ to create such a reference: Even though we completely own `v`, Rust tries to protect us from +//@ accidentally mutating things. //@ Hence owned variables that you intend to mutate have to be annotated with `mut`. -//@ Because the reference passed to `vec_inc` only lasts as long as the function call, we can still call -//@ `vec_inc` on the same vector twice: The durations of the two references do not overlap, so we never have more -//@ than one mutable reference - we only ever borrow `v` once at a time. However, we can *not* create a shared reference that spans a call to `vec_inc`. Just try +//@ Because the reference passed to `vec_inc` only lasts as long as the function call, we can still +//@ call `vec_inc` on the same vector twice: The durations of the two references do not overlap, so +//@ we never have more than one mutable reference - we only ever borrow `v` once at a time. +//@ However, we can *not* create a shared reference that spans a call to `vec_inc`. Just try //@ enabling the commented-out lines, and watch Rust complain. This is because `vec_inc` could mutate //@ the vector structurally (i.e., it could add or remove elements), and hence the reference `first` -//@ could become invalid. In other words, Rust keeps us safe from bugs like the one in the C++ snippet above. +//@ could become invalid. In other words, Rust keeps us safe from bugs like the one in the C++ +//@ snippet above. //@ -//@ Above, I said that having a mutable reference excludes aliasing. But if you look at the code above carefully, -//@ you may say: "Wait! Don't the `v` in `mutable_ref_demo` and the `v` in `vec_inc` alias?" And you are right, -//@ they do. However, the `v` in `mutable_ref_demo` is not actually usable, it is not *active*: As long as `v` is -//@ borrowed, Rust will not allow you to do anything with it. +//@ Above, I said that having a mutable reference excludes aliasing. But if you look at the code +//@ above carefully, you may say: "Wait! Don't the `v` in `mutable_ref_demo` and the `v` in +//@ `vec_inc` alias?" And you are right, they do. However, the `v` in `mutable_ref_demo` is not +//@ actually usable, it is not *active*: As long as `v` is borrowed, Rust will not allow you to do +//@ anything with it. // ## Summary // The ownership and borrowing system of Rust enforces the following three rules: // // * There is always exactly one owner of a piece of data // * If there is an active mutable reference, then nobody else can have active access to the data -// * If there is an active shared reference, then every other active access to the data is also a shared reference +// * If there is an active shared reference, then every other active access to the data is also a +// shared reference // -// As it turns out, combined with the abstraction facilities of Rust, this is a very powerful mechanism -// to tackle many problems beyond basic memory safety. You will see some examples for this soon. +// As it turns out, combined with the abstraction facilities of Rust, this is a very powerful +// mechanism to tackle many problems beyond basic memory safety. You will see some examples for +// this soon. -//@ [index](main.html) | [previous](part03.html) | [raw source](workspace/src/part04.rs) | [next](part05.html) +//@ [index](main.html) | [previous](part03.html) | [raw source](workspace/src/part04.rs) | +//@ [next](part05.html) diff --git a/src/part05.rs b/src/part05.rs index 49e57db..7ad8754 100644 --- a/src/part05.rs +++ b/src/part05.rs @@ -2,24 +2,26 @@ // ======================== // ## Big Numbers -//@ In the course of the next few parts, we are going to build a data-structure for computations with -//@ *big* numbers. We would like to not have an upper bound to how large these numbers can get, with -//@ the memory of the machine being the only limit. -//@ -//@ We start by deciding how to represent such big numbers. One possibility here is -//@ to use a vector "digits" of the number. This is like "1337" being a vector of four digits (1, 3, 3, 7), -//@ except that we will use `u64` as type of our digits, meaning we have 2^64 individual digits. Now we just -//@ have to decide the order in which we store numbers. I decided that we will store the least significant -//@ digit first. This means that "1337" would actually become (7, 3, 3, 1).
+ +//@ In the course of the next few parts, we are going to build a data-structure for computations +//@ with *big* numbers. We would like to not have an upper bound to how large these numbers can +//@ get, with the memory of the machine being the only limit. +//@ +//@ We start by deciding how to represent such big numbers. One possibility here is to use a vector +//@ "digits" of the number. This is like "1337" being a vector of four digits (1, 3, 3, 7), except +//@ that we will use `u64` as type of our digits, meaning we have 2^64 individual digits. Now we +//@ just have to decide the order in which we store numbers. I decided that we will store the least +//@ significant digit first. This means that "1337" would actually become (7, 3, 3, 1).
//@ Finally, we declare that there must not be any trailing zeros (corresponding to //@ useless leading zeros in our usual way of writing numbers). This is to ensure that //@ the same number can only be stored in one way. //@ To write this down in Rust, we use a `struct`, which is a lot like structs in C: -//@ Just a bunch of named fields. Every field can be private to the current module (which is the default), -//@ or public (which is indicated by a `pub` in front of the name). For the sake of the tutorial, we make -//@ `data` public - otherwise, the next parts of this course could not work on `BigInt`s. Of course, in a -//@ real program, one would make the field private to ensure that the invariant (no trailing zeros) is maintained. +//@ Just a bunch of named fields. Every field can be private to the current module (which is the +//@ default), or public (which is indicated by a `pub` in front of the name). For the sake of the +//@ tutorial, we make `data` public - otherwise, the next parts of this course could not work on +//@ `BigInt`s. Of course, in a real program, one would make the field private to ensure that the +//@ invariant (no trailing zeros) is maintained. pub struct BigInt { pub data: Vec, // least significant digit first, no trailing zeros } @@ -47,11 +49,11 @@ impl BigInt { } } - // We can convert any little-endian vector of digits (i.e., least-significant digit first) into a number, - // by removing trailing zeros. The `mut` declaration for `v` here is just like the one in `let mut ...`: - // We completely own `v`, but Rust still asks us to make our intention of modifying it explicit. This - // `mut` is *not* part of the type of `from_vec` - the caller has to give up ownership of `v` anyway, so - // they don't care anymore what you do to it. + // We can convert any little-endian vector of digits (i.e., least-significant digit first) into + // a number, by removing trailing zeros. The `mut` declaration for `v` here is just like the + // one in `let mut ...`: We completely own `v`, but Rust still asks us to make our intention of + // modifying it explicit. This `mut` is *not* part of the type of `from_vec` - the caller has + // to give up ownership of `v` anyway, so they don't care anymore what you do to it. // // **Exercise 05.1**: Implement this function. // @@ -62,11 +64,11 @@ impl BigInt { } // ## Cloning -//@ If you take a close look at the type of `BigInt::from_vec`, you will notice that it -//@ consumes the vector `v`. The caller hence loses access to its vector. However, there is something -//@ we can do if we don't want that to happen: We can explicitly `clone` the vector, -//@ which means that a full (or *deep*) copy will be performed. Technically, -//@ `clone` takes a borrowed vector in the form of a shared reference, and returns a fully owned one. +//@ If you take a close look at the type of `BigInt::from_vec`, you will notice that it consumes +//@ the vector `v`. The caller hence loses access to its vector. However, there is something we can +//@ do if we don't want that to happen: We can explicitly `clone` the vector, which means that a +//@ full (or *deep*) copy will be performed. Technically, `clone` takes a borrowed vector in the +//@ form of a shared reference, and returns a fully owned one. fn clone_demo() { let v = vec![0,1 << 16]; let b1 = BigInt::from_vec((&v).clone()); @@ -90,9 +92,9 @@ impl Clone for BigInt { //@ These `#[...]` annotations at types (and functions, modules, crates) are called *attributes*. //@ We will see some more examples of attributes later. -// We can also make the type `SomethingOrNothing` implement `Clone`. -//@ However, that can only work if `T` is `Clone`! So we have to add this bound to `T` when we introduce -//@ the type variable. +// We can also make the type `SomethingOrNothing` implement `Clone`. +//@ However, that can only work if `T` is `Clone`! So we have to add this bound to `T` when we +//@ introduce the type variable. use part02::{SomethingOrNothing,Something,Nothing}; impl Clone for SomethingOrNothing { fn clone(&self) -> Self { @@ -112,21 +114,22 @@ impl Clone for SomethingOrNothing { //@ Again, Rust will generate this implementation automatically if you add //@ `#[derive(Clone)]` right before the definition of `SomethingOrNothing`. -// **Exercise 05.2**: Write some more functions on `BigInt`. What about a function that returns the number of -// digits? The number of non-zero digits? The smallest/largest digit? Of course, these should all take `self` as a shared reference (i.e., in borrowed form). +// **Exercise 05.2**: Write some more functions on `BigInt`. What about a function that returns the +// number of digits? The number of non-zero digits? The smallest/largest digit? Of course, these +// should all take `self` as a shared reference (i.e., in borrowed form). // ## Mutation + aliasing considered harmful (part 2) -//@ Now that we know how to create references to contents of an `enum` (like `v` above), there's another example we can look at for why we -//@ have to rule out mutation in the presence of aliasing. First, we define an `enum` that can hold either -//@ a number, or a string. +//@ Now that we know how to create references to contents of an `enum` (like `v` above), there's +//@ another example we can look at for why we have to rule out mutation in the presence of +//@ aliasing. First, we define an `enum` that can hold either a number, or a string. enum Variant { Number(i32), Text(String), } -//@ Now consider the following piece of code. Like above, `n` will be a reference to a part of `var`, -//@ and since we wrote `ref mut`, the reference will be unique and mutable. In other words, right after the match, `ptr` -//@ points to the number that's stored in `var`, where `var` is a `Number`. Remember that `_` means -//@ "we don't care". +//@ Now consider the following piece of code. Like above, `n` will be a reference to a part of +//@ `var`, and since we wrote `ref mut`, the reference will be unique and mutable. In other words, +//@ right after the match, `ptr` points to the number that's stored in `var`, where `var` is a +//@ `Number`. Remember that `_` means "we don't care". fn work_on_variant(mut var: Variant, text: String) { let mut ptr: &mut i32; match var { @@ -136,15 +139,18 @@ fn work_on_variant(mut var: Variant, text: String) { /* var = Variant::Text(text); */ /* BAD! */ *ptr = 1337; } -//@ Now, imagine what would happen if we were permitted to also mutate `var`. We could, for example, -//@ make it a `Text`. However, `ptr` still points to the old location! Hence `ptr` now points somewhere -//@ into the representation of a `String`. By changing `ptr`, we manipulate the string in completely -//@ unpredictable ways, and anything could happen if we were to use it again! (Technically, the first field -//@ of a `String` is a pointer to its character data, so by overwriting that pointer with an integer, -//@ we make it a completely invalid address. When the destructor of `var` runs, it would try to deallocate -//@ that address, and Rust would eat your laundry - or whatever.) +//@ Now, imagine what would happen if we were permitted to also mutate `var`. We could, for +//@ example, make it a `Text`. However, `ptr` still points to the old location! Hence `ptr` now +//@ points somewhere into the representation of a `String`. By changing `ptr`, we manipulate the +//@ string in completely unpredictable ways, and anything could happen if we were to use it again! +//@ (Technically, the first field of a `String` is a pointer to its character data, so by +//@ overwriting that pointer with an integer, we make it a completely invalid address. When the +//@ destructor of `var` runs, it would try to deallocate that address, and Rust would eat your +//@ laundry - or whatever.) //@ -//@ I hope this example clarifies why Rust has to rule out mutation in the presence of aliasing *in general*, -//@ not just for the specific case of a buffer being reallocated, and old pointers becoming hence invalid. +//@ I hope this example clarifies why Rust has to rule out mutation in the presence of aliasing +//@ *in general*, not just for the specific case of a buffer being reallocated, and old pointers +//@ becoming hence invalid. -//@ [index](main.html) | [previous](part04.html) | [raw source](workspace/src/part05.rs) | [next](part06.html) +//@ [index](main.html) | [previous](part04.html) | [raw source](workspace/src/part05.rs) | +//@ [next](part06.html) diff --git a/src/part06.rs b/src/part06.rs index 7113094..21fe644 100644 --- a/src/part06.rs +++ b/src/part06.rs @@ -8,9 +8,10 @@ use part05::BigInt; // that computes the minimum of a list of `BigInt`. First, we have to write `min` for `BigInt`. impl BigInt { fn min_try1(self, other: Self) -> Self { - //@ Just to be sure, we first check that both operands actually satisfy our invariant. `debug_assert!` is a - //@ macro that checks that its argument (must be of type `bool`) is `true`, and panics otherwise. It gets - //@ removed in release builds, which you do with `cargo build --release`. + //@ Just to be sure, we first check that both operands actually satisfy our invariant. + //@ `debug_assert!` is a macro that checks that its argument (must be of type `bool`) is + //@ `true`, and panics otherwise. It gets removed in release builds, which you do with + //@ `cargo build --release`. debug_assert!(self.test_invariant() && other.test_invariant()); // Now our assumption of having no trailing zeros comes in handy: // If the lengths of the two numbers differ, we already know which is larger. @@ -28,7 +29,8 @@ impl BigInt { // Now we can write `vec_min`. fn vec_min(v: &Vec) -> Option { let mut min: Option = None; - // If `v` is a shared reference to a vector, then the default for iterating over it is to call `iter`, the iterator that borrows the elements. + // If `v` is a shared reference to a vector, then the default for iterating over it is to call + // `iter`, the iterator that borrows the elements. for e in v { let e = e.clone(); min = Some(match min { /*@*/ @@ -38,8 +40,8 @@ fn vec_min(v: &Vec) -> Option { } min } -//@ Now, what's happening here? Why do we have to to make a full (deep) copy of `e`, and why did we not -//@ have to do that in our previous version? +//@ Now, what's happening here? Why do we have to to make a full (deep) copy of `e`, and why did we +//@ not have to do that in our previous version? //@ //@ The answer is already hidden in the type of `vec_min`: `v` is just borrowed, but //@ the Option that it returns is *owned*. We can't just return one of the elements of `v`, @@ -50,26 +52,28 @@ fn vec_min(v: &Vec) -> Option { //@ underlying data is transferred from `e` to `min`. But that's not allowed, since //@ we just borrowed `e`, so we cannot empty it! We can, however, call `clone` on it. Then we own //@ the copy that was created, and hence we can store it in `min`.
-//@ Of course, making such a full copy is expensive, so we'd like to avoid it. We'll come to that in the next part. +//@ Of course, making such a full copy is expensive, so we'd like to avoid it. We'll come to that +//@ in the next part. // ## `Copy` types -//@ But before we go there, I should answer the second question I brought up above: Why did our old `vec_min` work? -//@ We stored the minimal `i32` locally without cloning, and Rust did not complain. That's because there isn't -//@ really much of an "ownership" when it comes to types like `i32` or `bool`: If you move the value from one -//@ place to another, then both instances are "complete". We also say the value has been *duplicated*. This is in -//@ stark contrast to types like `Vec`, where moving the value results in both the old and the new vector to -//@ point to the same underlying buffer. We don't have two vectors, there's no proper duplication. +//@ But before we go there, I should answer the second question I brought up above: Why did our old +//@ `vec_min` work? We stored the minimal `i32` locally without cloning, and Rust did not complain. +//@ That's because there isn't really much of an "ownership" when it comes to types like `i32` or +//@ `bool`: If you move the value from one place to another, then both instances are "complete". We +//@ also say the value has been *duplicated*. This is in stark contrast to types like `Vec`, +//@ where moving the value results in both the old and the new vector to point to the same +//@ underlying buffer. We don't have two vectors, there's no proper duplication. //@ -//@ Rust calls types that can be easily duplicated `Copy` types. `Copy` is another trait, and it is implemented for -//@ types like `i32` and `bool`. Remember how we defined the trait `Minimum` by writing `trait Minimum : Copy { ...`? -//@ This tells Rust that every type that implements `Minimum` must also implement `Copy`, and that's why the compiler -//@ accepted our generic `vec_min` in part 02. `Copy` is the first *marker trait* that we encounter: It does not provide -//@ any methods, but makes a promise about the behavior of the type - in this case, being duplicable. +//@ Rust calls types that can be easily duplicated `Copy` types. `Copy` is another trait, and it is +//@ implemented for types like `i32` and `bool`. Remember how we defined the trait `Minimum` by +//@ writing `trait Minimum : Copy { ...`? This tells Rust that every type that implements `Minimum` +//@ must also implement `Copy`, and that's why the compiler accepted our generic `vec_min` in part +//@ 02. `Copy` is the first *marker trait* that we encounter: It does not provide any methods, but +//@ makes a promise about the behavior of the type - in this case, being duplicable. -//@ If you try to implement `Copy` for `BigInt`, you will notice that Rust -//@ does not let you do that. A type can only be `Copy` if all its elements -//@ are `Copy`, and that's not the case for `BigInt`. However, we can make -//@ `SomethingOrNothing` copy if `T` is `Copy`. +//@ If you try to implement `Copy` for `BigInt`, you will notice that Rust does not let you do +//@ that. A type can only be `Copy` if all its elements are `Copy`, and that's not the case for +//@ `BigInt`. However, we can make `SomethingOrNothing` copy if `T` is `Copy`. use part02::{SomethingOrNothing,Something,Nothing}; impl Copy for SomethingOrNothing {} //@ Again, Rust can generate implementations of `Copy` automatically. If @@ -78,27 +82,29 @@ impl Copy for SomethingOrNothing {} //@ ## An operational perspective //@ Instead of looking at what happens "at the surface" (i.e., visible in Rust), one can also explain -//@ ownership passing and how `Copy` and `Clone` fit in by looking at what happens on the machine.
+//@ ownership passing and how `Copy` and `Clone` fit in by looking at what happens on the machine. +//@
//@ When Rust code is executed, passing a value (like `i32` or `Vec`) to a function will always //@ result in a shallow copy being performed: Rust just copies the bytes representing that value, and //@ considers itself done. That's just like the default copy constructor in C++. Rust, however, will //@ consider this a destructive operation: After copying the bytes elsewhere, the original value must -//@ no longer be used. After all, the two could now share a pointer! If, however, you mark a type `Copy`, -//@ then Rust will *not* consider a move destructive, and just like in C++, the old and new value -//@ can happily coexist. Now, Rust does not allow you to overload the copy constructor. This means that -//@ passing a value around will always be a fast operation, no allocation or any other kind of heap access -//@ will happen. In the situations where you would write a copy constructor in C++ (and hence -//@ incur a hidden cost on every copy of this type), you'd have the type *not* implement `Copy`, but only -//@ `Clone`. This makes the cost explicit. +//@ no longer be used. After all, the two could now share a pointer! If, however, you mark a type +//@ `Copy`, then Rust will *not* consider a move destructive, and just like in C++, the old and new +//@ value can happily coexist. Now, Rust does not allow you to overload the copy constructor. This +//@ means that passing a value around will always be a fast operation, no allocation or any other +//@ kind of heap access will happen. In the situations where you would write a copy constructor in +//@ C++ (and hence incur a hidden cost on every copy of this type), you'd have the type *not* +//@ implement `Copy`, but only `Clone`. This makes the cost explicit. // ## Lifetimes -//@ To fix the performance problems of `vec_min`, we need to avoid using `clone`. We'd like -//@ the return value to not be owned (remember that this was the source of our need for cloning), but *borrowed*. -//@ In other words, we want to return a shared reference to the minimal element. +//@ To fix the performance problems of `vec_min`, we need to avoid using `clone`. We'd like the +//@ return value to not be owned (remember that this was the source of our need for cloning), but +//@ *borrowed*. In other words, we want to return a shared reference to the minimal element. -//@ The function `head` demonstrates how that could work: It returns a reference to the first element of a vector if it is non-empty. -//@ The type of the function says that it will either return nothing, or it will return a borrowed `T`. -//@ We can then obtain a reference to the first element of `v` and use it to construct the return value. +//@ The function `head` demonstrates how that could work: It returns a reference to the first +//@ element of a vector if it is non-empty. The type of the function says that it will either +//@ return nothing, or it will return a borrowed `T`. We can then obtain a reference to the first +//@ element of `v` and use it to construct the return value. fn head(v: &Vec) -> Option<&T> { if v.len() > 0 { Some(&v[0]) /*@*/ @@ -106,8 +112,9 @@ fn head(v: &Vec) -> Option<&T> { None } } -// Technically, we are returning a pointer to the first element. But doesn't that mean that callers have to be -// careful? Imagine `head` would be a C++ function, and we would write the following code. +// Technically, we are returning a pointer to the first element. But doesn't that mean that callers +// have to be careful? Imagine `head` would be a C++ function, and we would write the following +// code. /* int foo(std::vector v) { int *first = head(v); @@ -115,37 +122,47 @@ fn head(v: &Vec) -> Option<&T> { return *first; } */ -//@ This is very much like our very first motivating example for ownership, at the beginning of part 04: -//@ `push_back` could reallocate the buffer, making `first` an invalid pointer. Again, we have aliasing (of `first` -//@ and `v`) and mutation. But this time, the bug is hidden behind the call to `head`. How does Rust solve this? If we translate -//@ the code above to Rust, it doesn't compile, so clearly we are good - but how and why? -//@ (Notice that have to explicitly assert using `unwrap` that `first` is not `None`, whereas the C++ code -//@ above would silently dereference a `NULL`-pointer. But that's another point.) +//@ This is very much like our very first motivating example for ownership, at the beginning of +//@ part 04: `push_back` could reallocate the buffer, making `first` an invalid pointer. Again, we +//@ have aliasing (of `first` and `v`) and mutation. But this time, the bug is hidden behind the +//@ call to `head`. How does Rust solve this? If we translate the code above to Rust, it doesn't +//@ compile, so clearly we are good - but how and why? +//@ (Notice that have to explicitly assert using //@ `unwrap` that `first` is not `None`, whereas +//@ the C++ code above would silently dereference a //@ `NULL`-pointer. But that's another point.) fn rust_foo(mut v: Vec) -> i32 { let first: Option<&i32> = head(&v); /* v.push(42); */ *first.unwrap() } -//@ To give the answer to this question, we have to talk about the *lifetime* of a reference. The point is, saying that -//@ you borrowed your friend a `Vec`, or a book, is not good enough, unless you also agree on *how long* -//@ your friend can borrow it. After all, you need to know when you can rely on owning your data (or book) again. +//@ To give the answer to this question, we have to talk about the *lifetime* of a reference. The +//@ point is, saying that you borrowed your friend a `Vec`, or a book, is not good enough, +//@ unless you also agree on *how long* your friend can borrow it. After all, you need to know when +//@ you can rely on owning your data (or book) again. //@ -//@ Every reference in Rust has an associated lifetime, written `&'a T` for a reference with lifetime `'a` to something of type `T`. The full -//@ type of `head` reads as follows: `fn<'a, T>(&'a Vec) -> Option<&'a T>`. Here, `'a` is a *lifetime variable*, which -//@ represents for how long the vector has been borrowed. The function type expresses that argument and return value have *the same lifetime*. +//@ Every reference in Rust has an associated lifetime, written `&'a T` for a reference with +//@ lifetime `'a` to something of type `T`. The full type of `head` reads as follows: `fn<'a, +//@ T>(&'a Vec) -> Option<&'a T>`. Here, `'a` is a *lifetime variable*, which represents for how +//@ long the vector has been borrowed. The function type expresses that argument and return value +//@ have *the same lifetime*. //@ -//@ When analyzing the code of `rust_foo`, Rust has to assign a lifetime to `first`. It will choose the scope -//@ where `first` is valid, which is the entire rest of the function. Because `head` ties the lifetime of its -//@ argument and return value together, this means that `&v` also has to borrow `v` for the entire duration of -//@ the function `rust_foo`. So when we try to create a unique reference to `v` for `push`, Rust complains that the two references (the one -//@ for `head`, and the one for `push`) overlap, so neither of them can be unique. Lucky us! Rust caught our mistake and made sure we don't crash the program. +//@ When analyzing the code of `rust_foo`, Rust has to assign a lifetime to `first`. It will choose +//@ the scope where `first` is valid, which is the entire rest of the function. Because `head` ties +//@ the lifetime of its argument and return value together, this means that `&v` also has to borrow +//@ `v` for the entire duration of the function `rust_foo`. So when we try to create a unique +//@ reference to `v` for `push`, Rust complains that the two references (the one for `head`, and +//@ the one for `push`) overlap, so neither of them can be unique. Lucky us! Rust caught our +//@ mistake and made sure we don't crash the program. //@ -//@ So, to sum this up: Lifetimes enable Rust to reason about *how long* a reference is valid, how long ownership has been borrowed. We can thus -//@ safely write functions like `head`, that return references into data they got as argument, and make sure they -//@ are used correctly, *while looking only at the function type*. At no point in our analysis of `rust_foo` did -//@ we have to look *into* `head`. That's, of course, crucial if we want to separate library code from application code. -//@ Most of the time, we don't have to explicitly add lifetimes to function types. This is thanks to *lifetime elision*, -//@ where Rust will automatically insert lifetimes we did not specify, following some [simple, well-documented rules](https://doc.rust-lang.org/stable/book/lifetimes.html#lifetime-elision). +//@ So, to sum this up: Lifetimes enable Rust to reason about *how long* a reference is valid, how +//@ long ownership has been borrowed. We can thus safely write functions like `head`, that return +//@ references into data they got as argument, and make sure they are used correctly, *while +//@ looking only at the function type*. At no point in our analysis of `rust_foo` did we have to +//@ look *into* `head`. That's, of course, crucial if we want to separate library code from +//@ application code. Most of the time, we don't have to explicitly add lifetimes to function +//@ types. This is thanks to *lifetime elision*, where Rust will automatically insert lifetimes we +//@ did not specify, following some simple, well-documented +//@ [rules](https://doc.rust- lang.org/stable/book/lifetimes.html#lifetime-elision). -//@ [index](main.html) | [previous](part05.html) | [raw source](workspace/src/part06.rs) | [next](part07.html) +//@ [index](main.html) | [previous](part05.html) | [raw source](workspace/src/part06.rs) | +//@ [next](part07.html) diff --git a/src/part07.rs b/src/part07.rs index e395ba6..b04d766 100644 --- a/src/part07.rs +++ b/src/part07.rs @@ -29,13 +29,14 @@ pub fn vec_min(v: &Vec) -> Option<&T> { //@ C(++) or Java that can be `NULL`! However, thanks to `Option` being an `enum`, we cannot forget //@ to check the pointer for validity, avoiding the safety issues of C(++).
//@ Also, if you are worried about wasting space, notice that Rust knows that `&T` can never be -//@ `NULL`, and hence optimizes `Option<&T>` to be no larger than `&T`. The `None` case is represented -//@ as `NULL`. This is another great example of a zero-cost abstraction: `Option<&T>` is exactly like -//@ a pointer in C(++), if you look at what happens during execution - but it's much safer to use. - -// **Exercise 07.1**: For our `vec_min` to be usable with `BigInt`, you will have to provide an implementation of -// `Minimum`. You should be able to pretty much copy the code you wrote for exercise 06.1. You should *not* -// make any copies of `BigInt`! +//@ `NULL`, and hence optimizes `Option<&T>` to be no larger than `&T`. The `None` case is +//@ represented as `NULL`. This is another great example of a zero-cost abstraction: `Option<&T>` +//@ is exactly like a pointer in C(++), if you look at what happens during execution - but it's +//@ much safer to use. + +// **Exercise 07.1**: For our `vec_min` to be usable with `BigInt`, you will have to provide an +// implementation of `Minimum`. You should be able to pretty much copy the code you wrote for +// exercise 06.1. You should *not* make any copies of `BigInt`! impl Minimum for BigInt { fn min<'a>(&'a self, other: &'a Self) -> &'a Self { unimplemented!() @@ -44,13 +45,15 @@ impl Minimum for BigInt { // ## Operator Overloading //@ How can we know that our `min` function actually does what we want it to do? One possibility -//@ here is to do *testing*. Rust comes with nice built-in support for both unit tests and integration -//@ tests. However, before we go there, we need to have a way of checking whether the results of function calls are -//@ correct. In other words, we need to define how to test equality of `BigInt`. Being able to -//@ test equality is a property of a type, that - you guessed it - Rust expresses as a trait: `PartialEq`. - -//@ Doing this for `BigInt` is fairly easy, thanks to our requirement that there be no trailing zeros. We simply -//@ re-use the equality test on vectors, which compares all the elements individually. +//@ here is to do *testing*. Rust comes with nice built-in support for both unit tests and +//@ integration tests. However, before we go there, we need to have a way of checking whether the +//@ results of function calls are correct. In other words, we need to define how to test equality +//@ of `BigInt`. Being able to test equality is a property of a type, that - you guessed it - Rust +//@ expresses as a trait: `PartialEq`. + +//@ Doing this for `BigInt` is fairly easy, thanks to our requirement that there be no trailing +//@ zeros. We simply re-use the equality test on vectors, which compares all the elements +//@ individually. //@ The `inline` attribute tells Rust that we will typically want this function to be inlined. impl PartialEq for BigInt { #[inline] @@ -62,28 +65,34 @@ impl PartialEq for BigInt { //@ Since implementing `PartialEq` is a fairly mechanical business, you can let Rust automate this //@ by adding the attribute `derive(PartialEq)` to the type definition. In case you wonder about -//@ the "partial", I suggest you check out the documentation of [`PartialEq`](https://doc.rust-lang.org/std/cmp/trait.PartialEq.html) -//@ and [`Eq`](https://doc.rust-lang.org/std/cmp/trait.Eq.html). `Eq` can be automatically derived as well. - -// Now we can compare `BigInt`s. Rust treats `PartialEq` special in that it is wired to the operator `==`: -//@ That operator can now be used on our numbers! Speaking in C++ terms, we just overloaded the `==` operator -//@ for `BigInt`. Rust does not have function overloading (i.e., it will not dispatch to different -//@ functions depending on the type of the argument). Instead, one typically finds (or defines) a -//@ trait that catches the core characteristic common to all the overloads, and writes a single -//@ function that's generic in the trait. For example, instead of overloading a function for all -//@ the ways a string can be represented, one writes a generic functions over [ToString](https://doc.rust-lang.org/std/string/trait.ToString.html). +//@ the "partial", I suggest you check out the documentation of +//@ [`PartialEq`](https://doc.rust-lang.org/std/cmp/trait.PartialEq.html) and +//@ [`Eq`](https://doc.rust-lang.org/std/cmp/trait.Eq.html). `Eq` can be automatically derived as +//@ well. + +// Now we can compare `BigInt`s. Rust treats `PartialEq` special in that it is wired to the operator +// `==`: +//@ That operator can now be used on our numbers! Speaking in C++ terms, we just overloaded the +//@ `==` operator for `BigInt`. Rust does not have function overloading (i.e., it will not dispatch +//@ to different functions depending on the type of the argument). Instead, one typically finds (or +//@ defines) a trait that catches the core characteristic common to all the overloads, and writes a +//@ single function that's generic in the trait. For example, instead of overloading a function for +//@ all the ways a string can be represented, one writes a generic functions over +//@ [ToString](https://doc.rust-lang.org/std/string/trait.ToString.html). //@ Usually, there is a trait like this that fits the purpose - and if there is, this has the great //@ advantage that any type *you* write, that can convert to a string, just has to implement -//@ that trait to be immediately usable with all the functions out there that generalize over `ToString`. +//@ that trait to be immediately usable with all the functions out there that generalize over +//@ `ToString`. //@ Compare that to C++ or Java, where the only chance to add a new overloading variant is to //@ edit the class of the receiver. //@ -//@ Why can we also use `!=`, even though we just overloaded `==`? The answer lies in what's called a *default implementation*. -//@ If you check out the documentation of `PartialEq` I linked above, you will see that the trait actually provides -//@ two methods: `eq` to test equality, and `ne` to test inequality. As you may have guessed, `!=` is wired to `ne`. -//@ The trait *definition* also provides a default implementation of `ne` to be the negation of `eq`. Hence you can just -//@ provide `eq`, and `!=` will work fine. Or, if you have a more efficient way of deciding inequality, you can provide -//@ `ne` for your type yourself. +//@ Why can we also use `!=`, even though we just overloaded `==`? The answer lies in what's called +//@ a *default implementation*. If you check out the documentation of `PartialEq` I linked above, +//@ you will see that the trait actually provides two methods: `eq` to test equality, and `ne` to +//@ test inequality. As you may have guessed, `!=` is wired to `ne`. The trait *definition* also +//@ provides a default implementation of `ne` to be the negation of `eq`. Hence you can just +//@ provide `eq`, and `!=` will work fine. Or, if you have a more efficient way of deciding +//@ inequality, you can provide `ne` for your type yourself. fn compare_big_ints() { let b1 = BigInt::new(13); let b2 = BigInt::new(37); @@ -92,8 +101,9 @@ fn compare_big_ints() { // ## Testing // With our equality test written, we are now ready to write our first testcase. -//@ It doesn't get much simpler: You just write a function (with no arguments or return value), and give it -// the `test` attribute. `assert!` is like `debug_assert!`, but does not get compiled away in a release build. +//@ It doesn't get much simpler: You just write a function (with no arguments or return value), +//@ and give it the `test` attribute. `assert!` is like `debug_assert!`, but does not get compiled +//@ away in a release build. #[test] fn test_min() { let b1 = BigInt::new(1); @@ -106,15 +116,16 @@ fn test_min() { // Now run `cargo test` to execute the test. If you implemented `min` correctly, it should all work! // ## Formatting -//@ There is also a macro `assert_eq!` that's specialized to test for equality, and that prints the two -//@ values (left and right) if they differ. To be able to do that, the macro needs to know how to format -//@ the value for printing. This means that we - guess what? - have to implement an appropriate trait. -//@ Rust knows about two ways of formatting a value: `Display` is for pretty-printing something in a way -//@ that users can understand, while `Debug` is meant to show the internal state of data and targeted at -//@ the programmer. The latter is what we want for `assert_eq!`, so let's get started. - -// All formating is handled by [`std::fmt`](https://doc.rust-lang.org/std/fmt/index.html). I won't explain -// all the details, and refer you to the documentation instead. +//@ There is also a macro `assert_eq!` that's specialized to test for equality, and that prints the +//@ two values (left and right) if they differ. To be able to do that, the macro needs to know how +//@ to format the value for printing. This means that we - guess what? - have to implement an +//@ appropriate trait. Rust knows about two ways of formatting a value: `Display` is for pretty- +//@ printing something in a way that users can understand, while `Debug` is meant to show the +//@ internal state of data and targeted at the programmer. The latter is what we want for +//@ `assert_eq!`, so let's get started. + +// All formating is handled by [`std::fmt`](https://doc.rust-lang.org/std/fmt/index.html). I won't +// explain all the details, and refer you to the documentation instead. use std::fmt; //@ In the case of `BigInt`, we'd like to just output our internal `data` array, so we @@ -142,9 +153,11 @@ fn test_vec_min() { // **Exercise 07.1**: Add some more testcases. In particular, make sure you test the behavior of // `vec_min` on an empty vector. Also add tests for `BigInt::from_vec` (in particular, removing // trailing zeros). Finally, break one of your functions in a subtle way and watch the test fail. -// -// **Exercise 07.2**: Go back to your good ol' `SomethingOrNothing`, and implement `Display` for it. (This will, -// of course, need a `Display` bound on `T`.) Then you should be able to use them with `println!` just like you do -// with numbers, and get rid of the inherent functions to print `SomethingOrNothing` and `SomethingOrNothing`. -//@ [index](main.html) | [previous](part06.html) | [raw source](workspace/src/part07.rs) | [next](part08.html) +// **Exercise 07.2**: Go back to your good ol' `SomethingOrNothing`, and implement `Display` for it. +// (This will, of course, need a `Display` bound on `T`.) Then you should be able to use them with +// `println!` just like you do with numbers, and get rid of the inherent functions to print +// `SomethingOrNothing` and `SomethingOrNothing`. + +//@ [index](main.html) | [previous](part06.html) | [raw source](workspace/src/part07.rs) | +//@ [next](part08.html) diff --git a/src/part08.rs b/src/part08.rs index c18cbeb..17beefa 100644 --- a/src/part08.rs +++ b/src/part08.rs @@ -4,25 +4,31 @@ use std::{cmp,ops}; use part05::BigInt; -//@ As our next goal, let us implement addition for our `BigInt`. The main issue here will be dealing with the overflow. -//@ First of all, we will have to detect when an overflow happens. This is stored in a so-called *carry* bit, and we have to carry this -//@ information on to the next pair of digits we add. The core primitive of addition therefore is to add two digits *and* a -//@ carry, and to return the sum digit and the next carry. - -// So, let us write a function to "add with carry", and give it the appropriate type. Notice Rust's native support for pairs. +//@ As our next goal, let us implement addition for our `BigInt`. The main issue here will be +//@ dealing with the overflow. First of all, we will have to detect when an overflow happens. This +//@ is stored in a so-called *carry* bit, and we have to carry this information on to the next pair +//@ of digits we add. The core primitive of addition therefore is to add two digits *and* a carry, +//@ and to return the sum digit and the next carry. + +// So, let us write a function to "add with carry", and give it the appropriate type. Notice Rust's +// native support for pairs. fn overflowing_add(a: u64, b: u64, carry: bool) -> (u64, bool) { - //@ Rust's stanza on integer overflows may be a bit surprising: In general, when we write `a + b`, an overflow is - //@ considered an *error*. If you compile your program in debug mode, Rust will actually check for that error and panic - //@ the program in case of overflows. For performance reasons, no such checks are currently inserted for release builds. - //@ The reason for this is that many serious security vulnerabilities have been caused by integer overflows, so just assuming - //@ "per default" that they are intended is dangerous.
- //@ If you explicitly *do* want an overflow to happen, you can call the `wrapping_add` - //@ function (see [the documentation](https://doc.rust-lang.org/stable/std/primitive.u64.html#method.wrapping_add), - //@ there are similar functions for other arithmetic operations). There are also similar functions - //@ `checked_add` etc. to enforce the overflow check. + //@ Rust's stanza on integer overflows may be a bit surprising: In general, when we write `a + + //@ b`, an overflow is considered an *error*. If you compile your program in debug mode, Rust + //@ will actually check for that error and panic the program in case of overflows. For + //@ performance reasons, no such checks are currently inserted for release builds. + //@ The reason for this is that many serious security vulnerabilities have been caused by + //@ integer overflows, so just assuming "per default" that they are intended is dangerous. + //@
+ //@ If you explicitly *do* want an overflow to happen, you can call the `wrapping_add` function + //@ (see the + //@ [documentation](https://doc.rust-lang.org/stable/std/primitive.u64.html#method.wrapping_add), + //@ there are similar functions for other arithmetic operations). There are also similar + //@ functions `checked_add` etc. to enforce the overflow check. let sum = a.wrapping_add(b); - // If an overflow happened, then the sum will be smaller than *both* summands. Without an overflow, of course, it will be - // at least as large as both of them. So, let's just pick one and check. + // If an overflow happened, then the sum will be smaller than *both* summands. Without an + // overflow, of course, it will be at least as large as both of them. So, let's just pick one + // and check. if sum >= a { // The addition did not overflow.
// **Exercise 08.1**: Write the code to handle adding the carry in this case. @@ -48,21 +54,27 @@ fn test_overflowing_add() { } // ## Associated Types -//@ Now we are equipped to write the addition function for `BigInt`. As you may have guessed, the `+` operator -//@ is tied to a trait (`std::ops::Add`), which we are going to implement for `BigInt`. +//@ Now we are equipped to write the addition function for `BigInt`. As you may have guessed, the +//@ `+` operator is tied to a trait (`std::ops::Add`), which we are going to implement for +//@ `BigInt`. //@ -//@ In general, addition need not be homogeneous: You could add things of different types, like vectors and points. So when implementing -//@ `Add` for a type, one has to specify the type of the other operand. In this case, it will also be `BigInt` (and we could have left it -//@ away, since that's the default). +//@ In general, addition need not be homogeneous: You could add things of different types, like +//@ vectors and points. So when implementing `Add` for a type, one has to specify the type of the +//@ other operand. In this case, it will also be `BigInt` (and we could have left it away, since +//@ that's the default). impl ops::Add for BigInt { - //@ Besides static functions and methods, traits can contain *associated types*: This is a type chosen by every particular implementation - //@ of the trait. The methods of the trait can then refer to that type. In the case of addition, it is used to give the type of the result. - //@ (Also see the [documentation of `Add`](https://doc.rust-lang.org/stable/std/ops/trait.Add.html).) + //@ Besides static functions and methods, traits can contain *associated types*: This is a type + //@ chosen by every particular implementation of the trait. The methods of the trait can then + //@ refer to that type. In the case of addition, it is used to give the type of the result. + //@ (Also see the + //@[documentation of `Add`](https://doc.rust-lang.org/stable/std/ops/trait.Add.html).) //@ - //@ In general, you can consider the two `BigInt` given above (in the `impl` line) *input* types of trait search: When - //@ `a + b` is invoked with `a` having type `T` and `b` having type `U`, Rust tries to find an implementation of `Add` for - //@ `T` where the right-hand type is `U`. The associated types, on the other hand, are *output* types: For every combination - //@ of input types, there's a particular result type chosen by the corresponding implementation of `Add`. + //@ In general, you can consider the two `BigInt` given above (in the `impl` line) *input* + //@ types of trait search: When `a + b` is invoked with `a` having type `T` and `b` having type + //@ `U`, Rust tries to find an implementation of `Add` for `T` where the right-hand type is + //@ `U`. The associated types, on the other hand, are *output* types: For every combination of + //@ input types, there's a particular result type chosen by the corresponding implementation of + //@ `Add`. // Here, we choose the result type to be again `BigInt`. type Output = BigInt; @@ -77,8 +89,10 @@ impl ops::Add for BigInt { for i in 0..max_len { let lhs_val = if i < self.data.len() { self.data[i] } else { 0 }; let rhs_val = if i < rhs.data.len() { rhs.data[i] } else { 0 }; - // Compute next digit and carry. Then, store the digit for the result, and the carry for later. - //@ Notice how we can obtain names for the two components of the pair that `overflowing_add` returns. + // Compute next digit and carry. Then, store the digit for the result, and the carry + // for later. + //@ Notice how we can obtain names for the two components of the pair that + //@ `overflowing_add` returns. let (sum, new_carry) = overflowing_add(lhs_val, rhs_val, carry); /*@*/ result_vec.push(sum); /*@*/ carry = new_carry; /*@*/ @@ -92,12 +106,13 @@ impl ops::Add for BigInt { } // ## Traits and reference types -//@ If you inspect the addition function above closely, you will notice that it actually consumes ownership of both operands -//@ to produce the result. This is, of course, in general not what we want. We'd rather like to be able to add two `&BigInt`. +//@ If you inspect the addition function above closely, you will notice that it actually consumes +//@ ownership of both operands to produce the result. This is, of course, in general not what we +//@ want. We'd rather like to be able to add two `&BigInt`. -// Writing this out becomes a bit tedious, because trait implementations (unlike functions) require full explicit annotation -// of lifetimes. Make sure you understand exactly what the following definition says. Notice that we can implement a trait for -// a reference type! +// Writing this out becomes a bit tedious, because trait implementations (unlike functions) require +// full explicit annotation of lifetimes. Make sure you understand exactly what the following +// definition says. Notice that we can implement a trait for a reference type! impl<'a, 'b> ops::Add<&'a BigInt> for &'b BigInt { type Output = BigInt; fn add(self, rhs: &'a BigInt) -> Self::Output { @@ -106,16 +121,19 @@ impl<'a, 'b> ops::Add<&'a BigInt> for &'b BigInt { } } -// **Exercise 08.4**: Implement the two missing combinations of arguments for `Add`. You should not have to duplicate the implementation. +// **Exercise 08.4**: Implement the two missing combinations of arguments for `Add`. You should not +// have to duplicate the implementation. // ## Modules -//@ As you learned, tests can be written right in the middle of your development in Rust. However, it is -//@ considered good style to bundle all tests together. This is particularly useful in cases where -//@ you wrote utility functions for the tests, that no other code should use. - -// Rust calls a bunch of definitions that are grouped together a *module*. You can put the tests in a submodule as follows. -//@ The `cfg` attribute controls whether this module is even compiled: If we added some functions that are useful for testing, -//@ Rust would not bother compiling them when you just build your program for normal use. Other than that, tests work as usually. +//@ As you learned, tests can be written right in the middle of your development in Rust. However, +//@ it is considered good style to bundle all tests together. This is particularly useful in cases +//@ where you wrote utility functions for the tests, that no other code should use. + +// Rust calls a bunch of definitions that are grouped together a *module*. You can put the tests in +// a submodule as follows. +//@ The `cfg` attribute controls whether this module is even compiled: If we added some functions +//@ that are useful for testing, Rust would not bother compiling them when you just build your +//@ program for normal use. Other than that, tests work as usually. #[cfg(test)] mod tests { use part05::BigInt; @@ -129,27 +147,36 @@ mod tests { // **Exercise 08.5**: Add some more cases to this test. } } -//@ As already mentioned, outside of the module, only those items declared public with `pub` may be used. Submodules can access -//@ everything defined in their parents. Modules themselves are also hidden from the outside per default, and can be made public -//@ with `pub`. When you use an identifier (or, more general, a *path* like `mod1::submod::name`), it is interpreted as being -//@ relative to the current module. So, for example, to access `overflowing_add` from within `my_mod`, you would have to give a more -//@ explicit path by writing `super::overflowing_add`, which tells Rust to look in the parent module. +//@ As already mentioned, outside of the module, only those items declared public with `pub` may be +//@ used. Submodules can access everything defined in their parents. Modules themselves are also +//@ hidden from the outside per default, and can be made public with `pub`. When you use an +//@ identifier (or, more general, a *path* like `mod1::submod::name`), it is interpreted as being +//@ relative to the current module. So, for example, to access `overflowing_add` from within +//@ `my_mod`, you would have to give a more explicit path by writing `super::overflowing_add`, +//@ which tells Rust to look in the parent module. //@ -//@ You can make names from other modules available locally with `use`. Per default, `use` works globally, so e.g. -//@ `use std;` imports the *global* name `std`. By adding `super::` or `self::` to the beginning of the path, you make it relative -//@ to the parent or current module, respectively. (You can also explicitly construct an absolute path by starting it with `::`, -//@ e.g., `::std::cmp::min`). You can say `pub use path;` to simultaneously *import* names and make them publicly available to others. -//@ Finally, you can import all public items of a module at once with `use module::*;`. +//@ You can make names from other modules available locally with `use`. Per default, `use` works +//@ globally, so e.g. `use std;` imports the *global* name `std`. By adding `super::` or `self::` +//@ to the beginning of the path, you make it relative to the parent or current module, +//@ respectively. (You can also explicitly construct an absolute path by starting it with `::`, +//@ e.g., `::std::cmp::min`). You can say `pub use path;` to simultaneously *import* names and make +//@ them publicly available to others. Finally, you can import all public items of a module at once +//@ with `use module::*;`. //@ -//@ Modules can be put into separate files with the syntax `mod name;`. To explain this, let me take a small detour through -//@ the Rust compilation process. Cargo starts by invoking`rustc` on the file `src/lib.rs` or `src/main.rs`, depending on whether -//@ you compile an application or a library. When `rustc` encounters a `mod name;`, it looks for the files `name.rs` and -//@ `name/mod.rs` and goes on compiling there. (It is an error for both of them to exist.) You can think of the contents of the -//@ file being embedded at this place. However, only the file where compilation started, and files `name/mod.rs` can load modules -//@ from other files. This ensures that the directory structure mirrors the structure of the modules, with `mod.rs`, `lib.rs` -//@ and `main.rs` representing a directory or crate itself (similar to, e.g., `__init__.py` in Python). - -// **Exercise 08.6**: Write a subtraction function, and testcases for it. Decide for yourself how you want to handle negative results. -// For example, you may want to return an `Option`, to panic, or to return `0`. - -//@ [index](main.html) | [previous](part07.html) | [raw source](workspace/src/part08.rs) | [next](part09.html) +//@ Modules can be put into separate files with the syntax `mod name;`. To explain this, let me +//@ take a small detour through the Rust compilation process. Cargo starts by invoking`rustc` on +//@ the file `src/lib.rs` or `src/main.rs`, depending on whether you compile an application or a +//@ library. When `rustc` encounters a `mod name;`, it looks for the files `name.rs` and +//@ `name/mod.rs` and goes on compiling there. (It is an error for both of them to exist.) +//@ You can think of the contents of the file being embedded at this place. However, only the file +//@ where compilation started, and files `name/mod.rs` can load modules from other files. This +//@ ensures that the directory structure mirrors the structure of the modules, with `mod.rs`, +//@ `lib.rs` and `main.rs` representing a directory or crate itself (similar to, e.g., +//@ `__init__.py` in Python). + +// **Exercise 08.6**: Write a subtraction function, and testcases for it. Decide for yourself how +// you want to handle negative results. For example, you may want to return an `Option`, to panic, +// or to return `0`. + +//@ [index](main.html) | [previous](part07.html) | [raw source](workspace/src/part08.rs) | +//@ [next](part09.html) diff --git a/src/part09.rs b/src/part09.rs index 0ca4a43..9cad15d 100644 --- a/src/part09.rs +++ b/src/part09.rs @@ -3,24 +3,30 @@ use part05::BigInt; -//@ In the following, we will look into the iterator mechanism of Rust and make our `BigInt` compatible -//@ with the `for` loops. Of course, this is all about implementing certain traits again. In particular, -//@ an iterator is something that implements the `Iterator` trait. As you can see in [the documentation](https://doc.rust-lang.org/stable/std/iter/trait.Iterator.html), -//@ this trait mandates a single function `next` returning an `Option`, where `Item` is an -//@ associated type chosen by the implementation. (There are many more methods provided for `Iterator`, -//@ but they all have default implementations, so we don't have to worry about them right now.) +//@ In the following, we will look into the iterator mechanism of Rust and make our `BigInt` +//@ compatible with the `for` loops. Of course, this is all about implementing certain traits +//@ again. In particular, an iterator is something that implements the `Iterator` trait. As you can +//@ see in the [documentation](https://doc.rust-lang.org/stable/std/iter/trait.Iterator.html), this +//@ trait mandates a single function `next` returning an `Option`, where `Item` is an +//@ associated type chosen by the implementation. (There are many more methods provided for +//@ `Iterator`, but they all have default implementations, so we don't have to worry about them +//@ right now.) //@ -//@ For the case of `BigInt`, we want our iterator to iterate over the digits in normal, notational order: The most-significant -//@ digit comes first. So, we have to write down some type, and implement `Iterator` for it such that `next` returns the digits -//@ one-by-one. Clearly, the iterator must somehow be able to access the number it iterates over, and it must store its current -//@ location. However, it cannot *own* the `BigInt`, because then the number would be gone after iteration! That'd certainly be bad. -//@ The only alternative is for the iterator to *borrow* the number, so it takes a reference. +//@ For the case of `BigInt`, we want our iterator to iterate over the digits in normal, notational +//@ order: The most-significant digit comes first. So, we have to write down some type, and +//@ implement `Iterator` for it such that `next` returns the digits one-by-one. Clearly, the +//@ iterator must somehow be able to access the number it iterates over, and it must store its +//@ current location. However, it cannot *own* the `BigInt`, because then the number would be gone +//@ after iteration! That'd certainly be bad. The only alternative is for the iterator to *borrow* +//@ the number, so it takes a reference. -//@ In writing this down, we again have to be explicit about the lifetime of the reference: We can't just have an -//@ `Iter`, we must have an `Iter<'a>` that borrows the number for lifetime `'a`. This is our first example of -//@ a data-type that's polymorphic in a lifetime, as opposed to a type.
-//@ `usize` here is the type of unsigned, pointer-sized numbers. It is typically the type of "lengths of things", -//@ in particular, it is the type of the length of a `Vec` and hence the right type to store an offset into the vector of digits. +//@ In writing this down, we again have to be explicit about the lifetime of the reference: We +//@ can't just have an `Iter`, we must have an `Iter<'a>` that borrows the number for lifetime +//@ `'a`. This is our first example of a data-type that's polymorphic in a lifetime, as opposed to +//@ a type.
+//@ `usize` here is the type of unsigned, pointer-sized numbers. It is typically the type of +//@ "lengths of things", in particular, it is the type of the length of a `Vec` and hence the right +//@ type to store an offset into the vector of digits. pub struct Iter<'a> { num: &'a BigInt, idx: usize, // the index of the last number that was returned @@ -46,9 +52,10 @@ impl<'a> Iterator for Iter<'a> { // All we need now is a function that creates such an iterator for a given `BigInt`. impl BigInt { - //@ Notice that when we write the type of `iter`, we don't actually have to give the lifetime parameter of `Iter`. Just as it is - //@ the case with functions returning references, you can elide the lifetime. The rules for adding the lifetimes are exactly the - //@ same. (See the last section of [part 06](part06.html).) + //@ Notice that when we write the type of `iter`, we don't actually have to give the lifetime + //@ parameter of `Iter`. Just as it is the case with functions returning references, you can + //@ elide the lifetime. The rules for adding the lifetimes are exactly the same. (See the last + //@ section of [part 06](part06.html).) fn iter(&self) -> Iter { Iter { num: self, idx: self.data.len() } /*@*/ } @@ -65,10 +72,11 @@ pub fn main() { // Of course, we don't have to use `for` to apply the iterator. We can also explicitly call `next`. fn print_digits_v1(b: &BigInt) { let mut iter = b.iter(); - //@ `loop` is the keyword for a loop without a condition: It runs endlessly, or until you break out of - //@ it with `break` or `return`. + //@ `loop` is the keyword for a loop without a condition: It runs endlessly, or until you break + //@ out of it with `break` or `return`. loop { - // Each time we go through the loop, we analyze the next element presented by the iterator - until it stops. + // Each time we go through the loop, we analyze the next element presented by the iterator + // - until it stops. match iter.next() { /*@*/ None => break, /*@*/ Some(digit) => println!("{}", digit) /*@*/ @@ -76,12 +84,13 @@ fn print_digits_v1(b: &BigInt) { } } -//@ Now, it turns out that this combination of doing a loop and a pattern matching is fairly common, and Rust -//@ provides some convenient syntactic sugar for it. +//@ Now, it turns out that this combination of doing a loop and a pattern matching is fairly +//@ common, and Rust provides some convenient syntactic sugar for it. fn print_digits_v2(b: &BigInt) { let mut iter = b.iter(); - //@ `while let` performs the given pattern matching on every round of the loop, and cancels the loop if the pattern - //@ doesn't match. There's also `if let`, which works similar, but of course without the loopy part. + //@ `while let` performs the given pattern matching on every round of the loop, and cancels the + //@ loop if the pattern doesn't match. There's also `if let`, which works similar, but of + //@ course without the loopy part. while let Some(digit) = iter.next() { println!("{}", digit) } @@ -89,14 +98,15 @@ fn print_digits_v2(b: &BigInt) { // **Exercise 09.1**: Write a testcase for the iterator, making sure it yields the corrects numbers. // -// **Exercise 09.2**: Write a function `iter_ldf` that iterates over the digits with the least-significant -// digits coming first. Write a testcase for it. +// **Exercise 09.2**: Write a function `iter_ldf` that iterates over the digits with the +// least-significant digits coming first. Write a testcase for it. // ## Iterator invalidation and lifetimes -//@ You may have been surprised that we had to explicitly annotate a lifetime when we wrote `Iter`. Of -//@ course, with lifetimes being present at every reference in Rust, this is only consistent. But do we at -//@ least gain something from this extra annotation burden? (Thankfully, this burden only occurs when we -//@ define *types*, and not when we define functions - which is typically much more common.) +//@ You may have been surprised that we had to explicitly annotate a lifetime when we wrote `Iter`. +//@ Of course, with lifetimes being present at every reference in Rust, this is only consistent. +//@ But do we at least gain something from this extra annotation burden? (Thankfully, this burden +//@ only occurs when we define *types*, and not when we define functions - which is typically much +//@ more common.) //@ It turns out that the answer to this question is yes! This particular aspect of the concept of //@ lifetimes helps Rust to eliminate the issue of *iterator invalidation*. Consider the following @@ -108,29 +118,37 @@ fn iter_invalidation_demo() { /*b = b + BigInt::new(1);*/ /* BAD! */ } } -//@ If you enable the bad line, Rust will reject the code. Why? The problem is that we are modifying the -//@ number while iterating over it. In other languages, this can have all sorts of effects from inconsistent -//@ data or throwing an exception (Java) to bad pointers being dereferenced (C++). Rust, however, is able to -//@ detect this situation. When you call `iter`, you have to borrow `b` for some lifetime `'a`, and you obtain -//@ `Iter<'a>`. This is an iterator that's only valid for lifetime `'a`. Gladly, we have this annotation available -//@ to make such a statement. Rust enforces that `'a` spans every call to `next`, which means it has to span the loop. -//@ Thus `b` is borrowed for the duration of the loop, and we cannot mutate it. This is yet another example for -//@ how the combination of mutation and aliasing leads to undesired effects (not necessarily crashes, think of Java), -//@ which Rust successfully prevents. +//@ If you enable the bad line, Rust will reject the code. Why? The problem is that we are +//@ modifying the number while iterating over it. In other languages, this can have all sorts of +//@ effects from inconsistent data or throwing an exception (Java) to bad pointers being +//@ dereferenced (C++). Rust, however, is able to detect this situation. +//@ When you call `iter`, you have to borrow `b` for some lifetime `'a`, and you obtain `Iter<'a>`. +//@ This is an iterator that's only valid for lifetime `'a`. Gladly, we have this annotation +//@ available to make such a statement. Rust enforces that `'a` spans every call to `next`, which +//@ means it has to span the loop. +//@ Thus `b` is borrowed for the duration of the loop, and we cannot mutate it. This is yet another +//@ example for how the combination of mutation and aliasing leads to undesired effects (not +//@ necessarily crashes, think of Java), which Rust successfully prevents. // ## Iterator conversion trait -//@ If you closely compare the `for` loop in `main` above, with the one in `part06::vec_min`, you will notice that we were able to write -//@ `for e in v` earlier, but now we have to call `iter`. Why is that? Well, the `for` sugar is not actually tied to `Iterator`. -//@ Instead, it demands an implementation of [`IntoIterator`](https://doc.rust-lang.org/stable/std/iter/trait.IntoIterator.html). -//@ That's a trait of types that provide a *conversion* function into some kind of iterator. These conversion traits are a frequent -//@ pattern in Rust: Rather than demanding that something is an iterator, or a string, or whatever; one demands that something -//@ can be converted to an iterator/string/whatever. This provides convenience similar to overloading of functions: The function -//@ can be called with lots of different types. By implementing such traits for your types, you can even make your own types -//@ work smoothly with existing library functions. As usually for Rust, this abstraction comes at zero cost: If your data is already -//@ of the right type, the conversion function will not do anything and trivially be optimized away. +//@ If you closely compare the `for` loop in `main` above, with the one in `part06::vec_min`, you +//@ will notice that we were able to write `for e in v` earlier, but now we have to call `iter`. +//@ Why is that? Well, the `for` sugar is not actually tied to `Iterator`. Instead, it demands an +//@ implementation of +//@ [`IntoIterator`](https://doc.rust-lang.org/stable/std/iter/trait.IntoIterator.html). +//@ That's a trait of types that provide a *conversion* function into some kind of iterator. These +//@ conversion traits are a frequent pattern in Rust: Rather than demanding that something is an +//@ iterator, or a string, or whatever; one demands that something can be converted to an +//@ iterator/string/whatever. This provides convenience similar to overloading of functions: The +//@ function can be called with lots of different types. +//@ By implementing such traits for your types, you can even make your own types work smoothly with +//@ existing library functions. As usually for Rust, this abstraction comes at zero cost: If your +//@ data is already of the right type, the conversion function will not do anything and trivially +//@ be optimized away. -//@ If you have a look at the documentation of `IntoIterator`, you will notice that the function `into_iter` it provides actually -//@ consumes its argument. So we implement the trait for *references to* numbers, such that the number is not lost after the iteration. +//@ If you have a look at the documentation of `IntoIterator`, you will notice that the function +//@ `into_iter` it provides actually consumes its argument. So we implement the trait for +//@ *references to* numbers, such that the number is not lost after the iteration. impl<'a> IntoIterator for &'a BigInt { type Item = u64; type IntoIter = Iter<'a>; @@ -139,11 +157,16 @@ impl<'a> IntoIterator for &'a BigInt { } } // With this in place, you can now replace `b.iter()` in `main` by `&b`. Go ahead and try it!
-//@ Wait, `&b`? Why that? Well, we implemented `IntoIterator` for `&BigInt`. If we are in a place where `b` is already borrowed, we can -//@ just do `for digit in b`. If however, we own `b`, we have to create a reference to it. Alternatively, we could implement `IntoIterator` -//@ for `BigInt` - which, as already mentioned, would mean that `b` is actually consumed by the iteration, and gone. This can easily happen, -//@ for example, with a `Vec`: Both `Vec` and `&Vec` (and `&mut Vec`) implement `IntoIterator`, so if you do `for e in v`, and `v` has type `Vec`, -//@ then you will obtain ownership of the elements during the iteration - and destroy the vector in the process. We actually did that in -//@ `part01::vec_min`, but we did not care. You can write `for e in &v` or `for e in v.iter()` to avoid this. +//@ Wait, `&b`? Why that? Well, we implemented `IntoIterator` for `&BigInt`. If we are in a place +//@ where `b` is already borrowed, we can just do `for digit in b`. If however, we own `b`, we have +//@ to create a reference to it. Alternatively, we could implement `IntoIterator` for `BigInt` - +//@ which, as already mentioned, would mean that `b` is actually consumed by the iteration, and +//@ gone. +//@ This can easily happen, for example, with a `Vec`: Both `Vec` and `&Vec` (and `&mut Vec`) +//@ implement `IntoIterator`, so if you do `for e in v`, and `v` has type `Vec`, then you will +//@ obtain ownership of the elements during the iteration - and destroy the vector in the process. +//@ We actually did that in `part01::vec_min`, but we did not care. You can write `for e in &v` or +//@ `for e in v.iter()` to avoid this. -//@ [index](main.html) | [previous](part08.html) | [raw source](workspace/src/part09.rs) | [next](part10.html) +//@ [index](main.html) | [previous](part08.html) | [raw source](workspace/src/part09.rs) | +//@ [next](part10.html) diff --git a/src/part10.rs b/src/part10.rs index 0c90369..39270de 100644 --- a/src/part10.rs +++ b/src/part10.rs @@ -9,120 +9,139 @@ use part05::BigInt; //@ our function. In Rust, a natural first attempt to express this is to have a trait for it. // So, let us define a trait that demands that the type provides some method `do_action` on digits. -//@ This immediately raises the question: How do we pass `self` to that function? Owned, shared reference, -//@ or mutable reference? The typical strategy to answer this question is to use the strongest -//@ type that still works. Certainly, passing `self` in owned form does not work: Then the function -//@ would consume `self`, and we could not call it again, on the second digit. So let's go with a mutable reference. +//@ This immediately raises the question: How do we pass `self` to that function? Owned, shared +//@ reference, or mutable reference? The typical strategy to answer this question is to use the +//@ strongest type that still works. Certainly, passing `self` in owned form does not work: Then +//@ the function would consume `self`, and we could not call it again, on the second digit. So +//@ let's go with a mutable reference. trait Action { fn do_action(&mut self, digit: u64); } -// Now we can write a function that takes some `a` of a type `A` such that we can call `do_action` on `a`, passing it every digit. +// Now we can write a function that takes some `a` of a type `A` such that we can call `do_action` +// on `a`, passing it every digit. impl BigInt { fn act_v1(&self, mut a: A) { - //@ Remember that the `mut` above is just an annotation to Rust, telling it that we're okay with `a` being mutated. - //@ Calling `do_action` on `a` takes a mutable reference, so mutation could indeed happen. + //@ Remember that the `mut` above is just an annotation to Rust, telling it that we're okay + //@ with `a` being mutated. Calling `do_action` on `a` takes a mutable reference, so + //@ mutation could indeed happen. for digit in self { a.do_action(digit); /*@*/ } } } -//@ As the next step, we need to come up with some action, and write an appropriate implementation of `Action` for it. -//@ So, let's say we want to print every digit, and to make this less boring, we want the digits to be prefixed by some -//@ arbitrary string. `do_action` has to know this string, so we store it in `self`. +//@ As the next step, we need to come up with some action, and write an appropriate implementation +//@ of `Action` for it. So, let's say we want to print every digit, and to make this less boring, +//@ we want the digits to be prefixed by some arbitrary string. `do_action` has to know this +//@ string, so we store it in `self`. struct PrintWithString { prefix: String, } impl Action for PrintWithString { - // Here we perform the actual printing of the prefix and the digit. We're not making use of our ability to - // change `self` here, but we could replace the prefix if we wanted. + // Here we perform the actual printing of the prefix and the digit. We're not making use of our + // ability to change `self` here, but we could replace the prefix if we wanted. fn do_action(&mut self, digit: u64) { println!("{}{}", self.prefix, digit); /*@*/ } } // Finally, this function takes a `BigInt` and a prefix, and prints the digits with the given prefix. -//@ It does so by creating an instance of `PrintWithString`, giving it the prefix, and then passing that to `act_v1`. -//@ Since `PrintWithString` implements `Action`, Rust now knows what to do. +//@ It does so by creating an instance of `PrintWithString`, giving it the prefix, and then passing +//@ that to `act_v1`. Since `PrintWithString` implements `Action`, Rust now knows what to do. fn print_with_prefix_v1(b: &BigInt, prefix: String) { let my_action = PrintWithString { prefix: prefix }; b.act_v1(my_action); } -// Here's a small main function, demonstrating the code above in action. Remember to edit `main.rs` to run it. +// Here's a small main function, demonstrating the code above in action. Remember to edit `main.rs` +// to run it. pub fn main() { let bignum = BigInt::new(1 << 63) + BigInt::new(1 << 16) + BigInt::new(1 << 63); print_with_prefix_v1(&bignum, "Digit: ".to_string()); } // ## Closures -//@ Now, as it turns out, this pattern of describing some form of an action, that can carry in additional data, is very common. -//@ In general, this is called a *closure*. Closures take some arguments and produce a result, and they have an *environment* -//@ they can use, which corresponds to the type `PrintWithString` (or any other type implementing `Action`). Again we have the -//@ choice of passing this environment in owned or borrowed form, so there are three traits for closures in Rust: `Fn`-closures -//@ get a shared reference, `FnMut`-closures get a mutable reference, and `FnOnce`-closures consume their environment (and can hence -//@ be called only once). The syntax for a closure trait which takes arguments of type `T1`, `T2`, ... and returns something -//@ of type `U` is `Fn(T1, T2, ...) -> U`. - -// This defines `act` very similar to above, but now we demand `A` to be the type of a closure that mutates its borrowed environment, -// takes a digit, and returns nothing. +//@ Now, as it turns out, this pattern of describing some form of an action, that can carry in +//@ additional data, is very common. In general, this is called a *closure*. Closures take some +//@ arguments and produce a result, and they have an *environment* they can use, which corresponds +//@ to the type `PrintWithString` (or any other type implementing `Action`). +//@ Again we have the choice of passing this environment in owned or borrowed form, so there are +//@ three traits for closures in Rust: `Fn`-closures get a shared reference, `FnMut`-closures get a +//@ mutable reference, and `FnOnce`-closures consume their environment (and can hence be called +//@ only once). The syntax for a closure trait which takes arguments of type `T1`, `T2`, ... and +//@ returns something of type `U` is `Fn(T1, T2, ...) -> U`. + +// This defines `act` very similar to above, but now we demand `A` to be the type of a closure that +// mutates its borrowed environment, takes a digit, and returns nothing. impl BigInt { fn act(&self, mut a: A) { for digit in self { - // We can call closures as if they were functions - but really, what's happening here is translated to essentially what we wrote above, in `act_v1`. + // We can call closures as if they were functions - but really, what's happening here + // is translated to essentially what we wrote above, in `act_v1`. a(digit); /*@*/ } } } -// Now that we saw how to write a function that operates on closures, let's see how to write a closure. +// Now that we saw how to write a function that operates on closures, let's see how to write a +// closure. pub fn print_with_prefix(b: &BigInt, prefix: String) { - //@ The syntax for closures is `|arg1, arg2, ...| code`. Notice that the closure can reference variables like `prefix` that it did not - //@ take as argument - variables that happen to be present *outside* of the closure. We say that the closure *captures* - //@ variables. Rust will now automatically create a type (like `PrintWithString`) for the environment of the closure - //@ with fields for every captured variable, implement the closure trait for this type such that the action performed - //@ is given by the code of the closure, and finally it will instantiate the environment type here at the definition site - //@ of the closure and fill it appropriately. + //@ The syntax for closures is `|arg1, arg2, ...| code`. Notice that the closure can reference + //@ variables like `prefix` that it did not take as argument - variables that happen to be + //@ present *outside* of the closure. We say that the closure *captures* variables. + //@ Rust will now automatically create a type (like `PrintWithString`) for the environment of + //@ the closure with fields for every captured variable, implement the closure trait for this + //@ type such that the action performed is given by the code of the closure, and finally it + //@ will instantiate the environment type here at the definition site of the closure and fill + //@ it appropriately. b.act(|digit| println!("{}{}", prefix, digit) ); } -// You can change `main` to call this function, and you should notice - nothing, no difference in behavior. -// But we wrote much less boilerplate code! +// You can change `main` to call this function, and you should notice - nothing, no difference in +// behavior. But we wrote much less boilerplate code! -// Remember that we decided to use the `FnMut` trait above? This means our closure could actually mutate its environment. -// For example, we can use that to count the digits as they are printed. +// Remember that we decided to use the `FnMut` trait above? This means our closure could actually +// mutate its environment. For example, we can use that to count the digits as they are printed. pub fn print_and_count(b: &BigInt) { let mut count: usize = 0; - //@ This time, the environment will contain a field of type `&mut usize`, that will be initialized with a mutable reference of - //@ `count`. The closure, since it mutably borrows its environment, is able to access this field and mutate `count` - //@ through it. Once `act` returns, the closure is destroyed and `count` is no longer borrowed. Because closures compile down - //@ to normal types, all the borrow checking continues to work as usually, and we cannot accidentally leak a closure somewhere - //@ that still contains, in its environment, a dead reference. + //@ This time, the environment will contain a field of type `&mut usize`, that will be + //@ initialized with a mutable reference of `count`. The closure, since it mutably borrows its + //@ environment, is able to access this field and mutate `count` through it. Once `act` + //@ returns, the closure is destroyed and `count` is no longer borrowed. + //@ Because closures compile down to normal types, all the borrow checking continues to work as + //@ usually, and we cannot accidentally leak a closure somewhere that still contains, in its + //@ environment, a dead reference. b.act(|digit| { println!("{}: {}", count, digit); count = count +1; } ); println!("There are {} digits", count); } // ## Fun with iterators and closures -//@ If you are familiar with functional languages, you are probably aware that one can have lots of fun with iterators and closures. -//@ Rust provides a whole lot of methods on iterators that allow us to write pretty functional-style list manipulation. +//@ If you are familiar with functional languages, you are probably aware that one can have lots of +//@ fun with iterators and closures. Rust provides a whole lot of methods on iterators that allow +//@ us to write pretty functional-style list manipulation. -// Let's say we want to write a function that increments every entry of a `Vec` by some number, then looks for numbers larger than some threshold, and prints them. +// Let's say we want to write a function that increments every entry of a `Vec` by some number, +// then looks for numbers larger than some threshold, and prints them. fn inc_print_threshold(v: &Vec, offset: i32, threshold: i32) { - //@ `map` takes a closure that is applied to every element of the iterator. `filter` removes elements - //@ from the iterator that do not pass the test given by the closure. + //@ `map` takes a closure that is applied to every element of the iterator. `filter` removes + //@ elements from the iterator that do not pass the test given by the closure. //@ - //@ Since all these closures compile down to the pattern described above, there is actually no heap allocation going on here. This makes - //@ closures very efficient, and it makes optimization fairly trivial: The resulting code will look like you hand-rolled the loop in C. + //@ Since all these closures compile down to the pattern described above, there is actually no + //@ heap allocation going on here. This makes closures very efficient, and it makes + //@ optimization fairly trivial: The resulting code will look like you hand-rolled the loop in + //@ C. for i in v.iter().map(|n| *n + offset).filter(|n| *n > threshold) { println!("{}", i); } } -// Sometimes it is useful to know both the position of some element in a list, and its value. That's where the `enumerate` function helps. +// Sometimes it is useful to know both the position of some element in a list, and its value. +// That's where the `enumerate` function helps. fn print_enumerated(v: &Vec) { - //@ `enumerate` turns an iterator over `T` into an iterator over `(usize, T)`, where the first element just counts the position in the iterator. - //@ We can do pattern matching right in the loop header to obtain names for both the position, and the value. + //@ `enumerate` turns an iterator over `T` into an iterator over `(usize, T)`, where the first + //@ element just counts the position in the iterator. We can do pattern matching right in the + //@ loop header to obtain names for both the position, and the value. for (i, t) in v.iter().enumerate() { println!("Position {}: {}", i, t); } @@ -130,15 +149,20 @@ fn print_enumerated(v: &Vec) { // And as a final example, one can also collect all elements of an iterator, and put them, e.g., in a vector. fn filter_vec_by_divisor(v: &Vec, divisor: i32) -> Vec { - //@ Here, the return type of `collect` is inferred based on the return type of our function. In general, it can return anything implementing - //@ [`FromIterator`](https://doc.rust-lang.org/stable/std/iter/trait.FromIterator.html). Notice that `iter` gives us an iterator over - //@ borrowed `i32`, but we want to own them for the result, so we insert a `map` to dereference. + //@ Here, the return type of `collect` is inferred based on the return type of our function. In + //@ general, it can return anything implementing + //@ [`FromIterator`](https://doc.rust-lang.org/stable/std/iter/trait.FromIterator.html). + //@ Notice that `iter` gives us an iterator over borrowed `i32`, but we want to own them for + //@ the result, so we insert a `map` to dereference. v.iter().map(|n| *n).filter(|n| *n % divisor == 0).collect() /*@*/ } -// **Exercise 10.1**: Look up the [documentation of `Iterator`](https://doc.rust-lang.org/stable/std/iter/trait.Iterator.html) to learn about more functions -// that can act on iterators. Try using some of them. What about a function that sums the even numbers of an iterator? Or a function that computes the -// product of those numbers that sit at odd positions? A function that checks whether a vector contains a certain number? Whether all numbers are -// smaller than some threshold? Be creative! +// **Exercise 10.1**: Look up the +// [documentation of `Iterator`](https://doc.rust-lang.org/stable/std/iter/trait.Iterator.html) +// to learn about more functions that can act on iterators. Try using some of them. What about a +// function that sums the even numbers of an iterator? Or a function that computes the product of +// those numbers that sit at odd positions? A function that checks whether a vector contains a +// certain number? Whether all numbers are smaller than some threshold? Be creative! -//@ [index](main.html) | [previous](part09.html) | [raw source](workspace/src/part10.rs) | [next](part11.html) +//@ [index](main.html) | [previous](part09.html) | [raw source](workspace/src/part10.rs) | +//@ [next](part11.html) diff --git a/src/part11.rs b/src/part11.rs index 757f205..a088e77 100644 --- a/src/part11.rs +++ b/src/part11.rs @@ -2,34 +2,43 @@ // ====================================================== //@ We will play around with closures a bit more. Let us implement some kind of generic "callback" -//@ mechanism, providing two functions: Registering a new callback, and calling all registered callbacks. +//@ mechanism, providing two functions: Registering a new callback, and calling all registered +//@ callbacks. -//@ First of all, we need to find a way to store the callbacks. Clearly, there will be a `Vec` involved, so that we can -//@ always grow the number of registered callbacks. A callback will be a closure, i.e., something implementing -//@ `FnMut(i32)` (we want to call this multiple times, so clearly `FnOnce` would be no good). So our first attempt may be the following. +//@ First of all, we need to find a way to store the callbacks. Clearly, there will be a `Vec` +//@ involved, so that we can always grow the number of registered callbacks. A callback will be a +//@ closure, i.e., something implementing `FnMut(i32)` (we want to call this multiple times, so +//@ clearly `FnOnce` would be no good). So our first attempt may be the following. // For now, we just decide that the callbacks have an argument of type `i32`. struct CallbacksV1 { callbacks: Vec, } -//@ However, this will not work. Remember how the "type" of a closure is specific to the environment of captured variables. Different closures -//@ all implementing `FnMut(i32)` may have different types. However, a `Vec` is a *uniformly typed* vector. +//@ However, this will not work. Remember how the "type" of a closure is specific to the +//@ environment of captured variables. Different closures all implementing `FnMut(i32)` may have +//@ different types. However, a `Vec` is a *uniformly typed* vector. -//@ We will thus need a way to store things of *different* types in the same vector. We know all these types implement `FnMut(i32)`. For this scenario, -//@ Rust provides *trait objects*: The truth is, `FnMut(i32)` is not just a trait. It is also a type, that can be given to anything implementing -//@ this trait. So, we may write the following. +//@ We will thus need a way to store things of *different* types in the same vector. We know all +//@ these types implement `FnMut(i32)`. For this scenario, Rust provides *trait objects*: The truth +//@ is, `FnMut(i32)` is not just a trait. It is also a type, that can be given to anything +//@ implementing this trait. So, we may write the following. /* struct CallbacksV2 { callbacks: Vec, } */ -//@ But, Rust complains about this definition. It says something about "Sized". What's the trouble? See, for many things we want to do, it is crucial that -//@ Rust knows the precise, fixed size of the type - that is, how large this type will be when represented in memory. For example, for a `Vec`, the -//@ elements are stored one right after the other. How should that be possible, without a fixed size? The point is, `FnMut(i32)` could be of any size. -//@ We don't know how large that "type that implements `FnMut(i32)`" is. Rust calls this an *unsized* type. Whenever we introduce a type variable, Rust -//@ will implicitly add a bound to that variable, demanding that it is sized. That's why we did not have to worry about this so far.
-//@ You can opt-out of this implicit bound by saying `T: ?Sized`. Then `T` may or may not be sized. +//@ But, Rust complains about this definition. It says something about "Sized". What's the trouble? +//@ See, for many things we want to do, it is crucial that Rust knows the precise, fixed size of +//@ the type - that is, how large this type will be when represented in memory. For example, for a +//@ `Vec`, the elements are stored one right after the other. How should that be possible, without +//@ a fixed size? The point is, `FnMut(i32)` could be of any size. We don't know how large that +//@ "type that implements `FnMut(i32)`" is. Rust calls this an *unsized* type. +//@ Whenever we introduce a type variable, Rust will implicitly add a bound to that variable, +//@ demanding that it is sized. That's why we did not have to worry about this so far.
You +//@ can opt-out of this implicit bound by saying `T: ?Sized`. Then `T` may or may not be sized. -//@ So, what can we do, if we can't store the callbacks in a vector? We can put them in a box. Semantically, `Box` is a lot like `T`: You fully own -//@ the data stored there. On the machine, however, `Box` is a *pointer* to a heap-allocated `T`. It is a lot like `std::unique_ptr` in C++. In our current example, -//@ the important bit is that since it's a pointer, `T` can be unsized, but `Box` itself will always be sized. So we can put it in a `Vec`. +//@ So, what can we do, if we can't store the callbacks in a vector? We can put them in a box. +//@ Semantically, `Box` is a lot like `T`: You fully own the data stored there. On the machine, +//@ however, `Box` is a *pointer* to a heap-allocated `T`. It is a lot like `std::unique_ptr` in +//@ C++. In our current example, the important bit is that since it's a pointer, `T` can be +//@ unsized, but `Box` itself will always be sized. So we can put it in a `Vec`. pub struct Callbacks { callbacks: Vec>, } @@ -45,16 +54,19 @@ impl Callbacks { self.callbacks.push(callback); } - // We can also write a generic version of `register`, such that it will be instantiated with some concrete closure type `F` - // and do the creation of the `Box` and the conversion from `F` to `FnMut(i32)` itself. + // We can also write a generic version of `register`, such that it will be instantiated with + // some concrete closure type `F` and do the creation of the `Box` and the conversion from `F` + // to `FnMut(i32)` itself. - //@ For this to work, we need to demand that the type `F` does not contain any short-lived references. After all, we will store it - //@ in our list of callbacks indefinitely. If the closure contained a pointer to our caller's stackframe, that pointer - //@ could be invalid by the time the closure is called. We can mitigate this by bounding `F` by a *lifetime*: `F: 'a` says - //@ that all data of type `F` will *outlive* (i.e., will be valid for at least as long as) lifetime `'a`. + //@ For this to work, we need to demand that the type `F` does not contain any short-lived + //@ references. After all, we will store it in our list of callbacks indefinitely. If the + //@ closure contained a pointer to our caller's stackframe, that pointer could be invalid by + //@ the time the closure is called. We can mitigate this by bounding `F` by a *lifetime*: `F: + //@ 'a` says that all data of type `F` will *outlive* (i.e., will be valid for at least as long + //@ as) lifetime `'a`. //@ Here, we use the special lifetime `'static`, which is the lifetime of the entire program. - //@ The same bound has been implicitly added in the version of `register` above, and in the definition of - //@ `Callbacks`. + //@ The same bound has been implicitly added in the version of `register` above, and in the + //@ definition of `Callbacks`. pub fn register_generic(&mut self, callback: F) { self.callbacks.push(Box::new(callback)); /*@*/ } @@ -63,15 +75,18 @@ impl Callbacks { pub fn call(&mut self, val: i32) { // Since they are of type `FnMut`, we need to mutably iterate. for callback in self.callbacks.iter_mut() { - //@ Here, `callback` has type `&mut Box`. We can make use of the fact that `Box` is a *smart pointer*: In - //@ particular, we can use it as if it were a normal reference, and use `*` to get to its contents. Then we obtain a - //@ mutable reference to these contents, because we call a `FnMut`. + //@ Here, `callback` has type `&mut Box`. We can make use of the fact that + //@ `Box` is a *smart pointer*: In particular, we can use it as if it were a normal + //@ reference, and use `*` to get to its contents. Then we obtain a mutable reference + //@ to these contents, because we call a `FnMut`. (&mut *callback)(val); /*@*/ - //@ Just like it is the case with normal references, this typically happens implicitly with smart pointers, so we can also directly call the function. + //@ Just like it is the case with normal references, this typically happens implicitly + //@ with smart pointers, so we can also directly call the function. //@ Try removing the `&mut *`. //@ - //@ The difference to a reference is that `Box` implies full ownership: Once you drop the box (i.e., when the entire `Callbacks` instance is - //@ dropped), the content it points to on the heap will be deleted. + //@ The difference to a reference is that `Box` implies full ownership: Once you drop + //@ the box (i.e., when the entire `Callbacks` instance is dropped), the content it + //@ points to on the heap will be deleted. } } } @@ -83,11 +98,14 @@ pub fn main() { c.call(0); { - //@ We can even register callbacks that modify their environment. Per default, Rust will attempt to capture a reference to `count`, to borrow it. However, - //@ that doesn't work out this time. Remember the `'static` bound above? Borrowing `count` in the environment would - //@ violate that bound, as the reference is only valid for this block. If the callbacks are triggered later, we'd be in trouble. - //@ We have to explicitly tell Rust to `move` ownership of the variable into the closure. Its environment will then contain a - //@ `usize` rather than a `&mut usize`, and the closure has no effect on this local variable anymore. + //@ We can even register callbacks that modify their environment. Per default, Rust will + //@ attempt to capture a reference to `count`, to borrow it. However, that doesn't work out + //@ this time. Remember the `'static` bound above? Borrowing `count` in the environment + //@ would violate that bound, as the reference is only valid for this block. If the + //@ callbacks are triggered later, we'd be in trouble. + //@ We have to explicitly tell Rust to `move` ownership of the variable into the closure. + //@ Its environment will then contain a `usize` rather than a `&mut usize`, and the closure + //@ has no effect on this local variable anymore. let mut count: usize = 0; c.register_generic(move |val| { count = count+1; @@ -98,23 +116,31 @@ pub fn main() { } //@ ## Run-time behavior -//@ When you run the program above, how does Rust know what to do with the callbacks? Since an unsized type lacks some information, -//@ a *pointer* to such a type (be it a `Box` or a reference) will need to complete this information. We say that pointers to -//@ trait objects are *fat*. They store not only the address of the object, but (in the case of trait objects) also a *vtable*: A -//@ table of function pointers, determining the code that's run when a trait method is called. There are some restrictions for traits to be usable -//@ as trait objects. This is called *object safety* and described in [the documentation](https://doc.rust-lang.org/stable/book/trait-objects.html) and [the reference](https://doc.rust-lang.org/reference.html#trait-objects). -//@ In case of the `FnMut` trait, there's only a single action to be performed: Calling the closure. You can thus think of a pointer to `FnMut` as -//@ a pointer to the code, and a pointer to the environment. This is how Rust recovers the typical encoding of closures as a special case of a more -//@ general concept. +//@ When you run the program above, how does Rust know what to do with the callbacks? Since an +//@ unsized type lacks some information, a *pointer* to such a type (be it a `Box` or a reference) +//@ will need to complete this information. We say that pointers to trait objects are *fat*. They +//@ store not only the address of the object, but (in the case of trait objects) also a *vtable*: A +//@ table of function pointers, determining the code that's run when a trait method is called. +//@ There are some restrictions for traits to be usable as trait objects. This is called *object +//@ safety* and described in [the documentation](https://doc.rust-lang.org/stable/book/trait- +//@ objects.html) and [the reference](https://doc.rust-lang.org/reference.html#trait-objects). +//@ In case of the `FnMut` trait, there's only a single action to be performed: Calling the +//@ closure. You can thus think of a pointer to `FnMut` as a pointer to the code, and a pointer to +//@ the environment. This is how Rust recovers the typical encoding of closures as a special case +//@ of a more general concept. //@ -//@ Whenever you write a generic function, you have a choice: You can make it generic, like `register_generic`. Or you -//@ can use trait objects, like `register`. The latter will result in only a single compiled version (rather -//@ than one version per type it is instantiated with). This makes for smaller code, but you pay the overhead of the virtual function calls. -//@ (Of course, in the case of `register` above, there's no function called on the trait object.) -//@ Isn't it beautiful how traits can nicely handle this tradeoff (and much more, as we saw, like closures and operator overloading)? +//@ Whenever you write a generic function, you have a choice: You can make it generic, like +//@ `register_generic`. Or you can use trait objects, like `register`. The latter will result in +//@ only a single compiled version (rather than one version per type it is instantiated with). This +//@ makes for smaller code, but you pay the overhead of the virtual function calls. (Of course, in +//@ the case of `register` above, there's no function called on the trait object.) +//@ Isn't it beautiful how traits can nicely handle this tradeoff (and much more, as we saw, like +//@ closures and operator overloading)? -// **Exercise 11.1**: We made the arbitrary choice of using `i32` for the arguments. Generalize the data structures above -// to work with an arbitrary type `T` that's passed to the callbacks. Since you need to call multiple callbacks with the -// same `val: T` (in our `call` function), you will either have to restrict `T` to `Copy` types, or pass a reference. +// **Exercise 11.1**: We made the arbitrary choice of using `i32` for the arguments. Generalize the +// data structures above to work with an arbitrary type `T` that's passed to the callbacks. Since +// you need to call multiple callbacks with the same `val: T` (in our `call` function), you will +// either have to restrict `T` to `Copy` types, or pass a reference. -//@ [index](main.html) | [previous](part10.html) | [raw source](workspace/src/part11.rs) | [next](part12.html) +//@ [index](main.html) | [previous](part10.html) | [raw source](workspace/src/part11.rs) | +//@ [next](part12.html) diff --git a/src/part12.rs b/src/part12.rs index c2331ea..e7ccf99 100644 --- a/src/part12.rs +++ b/src/part12.rs @@ -4,22 +4,25 @@ use std::rc::Rc; use std::cell::{Cell, RefCell}; -//@ Our generic callback mechanism is already working quite nicely. However, there's one point we may want to fix: -//@ `Callbacks` does not implement `Clone`. The problem is that closures (or rather, their environment) can never be cloned. +//@ Our generic callback mechanism is already working quite nicely. However, there's one point we +//@ may want to fix: `Callbacks` does not implement `Clone`. The problem is that closures (or +//@ rather, their environment) can never be cloned. //@ (There's not even an automatic derivation happening for the cases where it would be possible.) //@ This restriction propagates up to `Callbacks` itself. What could we do about this? //@ ## `Rc` -//@ The solution is to find some way of cloning `Callbacks` without cloning the environments. This can be achieved with -//@ `Rc`, a *reference-counted* pointer. This is is another example of a smart pointer. You can `clone` an `Rc` as often -//@ as you want, that doesn't affect the data it contains. It only creates more references to the same data. Once all the -//@ references are gone, the data is deleted. +//@ The solution is to find some way of cloning `Callbacks` without cloning the environments. This +//@ can be achieved with `Rc`, a *reference-counted* pointer. This is is another example of a +//@ smart pointer. You can `clone` an `Rc` as often as you want, that doesn't affect the data it +//@ contains. It only creates more references to the same data. Once all the references are gone, +//@ the data is deleted. //@ //@ Wait a moment, you may say here. Multiple references to the same data? That's aliasing! Indeed: //@ Once data is stored in an `Rc`, it is read-only and you can only ever get a shared reference to the data again. -//@ Because of this read-only restriction, we cannot use `FnMut` here: We'd be unable to call the function with a mutable reference -//@ to it's environment! So we have to go with `Fn`. We wrap that in an `Rc`, and then Rust happily derives `Clone` for us. +//@ Because of this read-only restriction, we cannot use `FnMut` here: We'd be unable to call the +//@ function with a mutable reference to it's environment! So we have to go with `Fn`. We wrap that +//@ in an `Rc`, and then Rust happily derives `Clone` for us. #[derive(Clone)] struct Callbacks { callbacks: Vec>, @@ -55,17 +58,22 @@ pub fn main() { } // ## Interior Mutability -//@ Of course, the counting example from last time does not work anymore: It needs to mutate the environment, which a `Fn` -//@ cannot do. The strict borrowing Rules of Rust are getting into our way. However, when it comes to mutating a mere number -//@ (`usize`), there's not really any chance of problems coming up. Everybody can read and write that variable just as they want. -//@ So it would be rather sad if we were not able to write this program. Lucky enough, Rust's standard library provides a -//@ solution in the form of `Cell`. This type represents a memory cell of some type `T`, providing the two basic operations -//@ `get` and `set`. `get` returns a *copy* of the content of the cell, so all this works only if `T` is `Copy`. -//@ `set`, which overrides the content, only needs a *shared reference* to the cell. The phenomenon of a type that permits mutation through -//@ shared references (i.e., mutation despite the possibility of aliasing) is called *interior mutability*. You can think -//@ of `set` changing only the *contents* of the cell, not its *identity*. In contrast, the kind of mutation we saw so far was -//@ about replacing one piece of data by something else of the same type. This is called *inherited mutability*.
-//@ Notice that it is impossible to *borrow* the contents of the cell, and that is actually the key to why this is safe. +//@ Of course, the counting example from last time does not work anymore: It needs to mutate the +//@ environment, which a `Fn` cannot do. The strict borrowing Rules of Rust are getting into our +//@ way. However, when it comes to mutating a mere number (`usize`), there's not really any chance +//@ of problems coming up. Everybody can read and write that variable just as they want. +//@ So it would be rather sad if we were not able to write this program. Lucky enough, Rust's +//@ standard library provides a solution in the form of `Cell`. This type represents a memory +//@ cell of some type `T`, providing the two basic operations `get` and `set`. `get` returns a +//@ *copy* of the content of the cell, so all this works only if `T` is `Copy`. +//@ `set`, which overrides the content, only needs a *shared reference* to the cell. The phenomenon +//@ of a type that permits mutation through shared references (i.e., mutation despite the +//@ possibility of aliasing) is called *interior mutability*. You can think of `set` changing only +//@ the *contents* of the cell, not its *identity*. In contrast, the kind of mutation we saw so far +//@ was about replacing one piece of data by something else of the same type. This is called +//@ *inherited mutability*.
+//@ Notice that it is impossible to *borrow* the contents of the cell, and that is actually the key +//@ to why this is safe. // So, let us put our counter in a `Cell`, and replicate the example from the previous part. fn demo_cell(c: &mut Callbacks) { @@ -73,9 +81,10 @@ fn demo_cell(c: &mut Callbacks) { let count = Cell::new(0); // Again, we have to move ownership of the `count` into the environment closure. c.register(move |val| { - // In here, all we have is a shared reference of our environment. But that's good enough for the `get` and `set` of the cell! - //@ At run-time, the `Cell` will be almost entirely compiled away, so this becomes pretty much equivalent to the version - //@ we wrote in the previous part. + // In here, all we have is a shared reference of our environment. But that's good enough + // for the `get` and `set` of the cell! + //@ At run-time, the `Cell` will be almost entirely compiled away, so this becomes + //@ pretty much equivalent to the version we wrote in the previous part. let new_count = count.get()+1; count.set(new_count); println!("Callback 2: {} ({}. time)", val, new_count); @@ -85,26 +94,31 @@ fn demo_cell(c: &mut Callbacks) { c.call(2); c.clone().call(3); } -//@ It is worth mentioning that `Rc` itself also has to make use of interior mutability: When you `clone` an `Rc`, all it has available -//@ is a shared reference. However, it has to increment the reference count! Internally, `Rc` uses `Cell` for the count, such that it -//@ can be updated during `clone`. +//@ It is worth mentioning that `Rc` itself also has to make use of interior mutability: When you +//@ `clone` an `Rc`, all it has available is a shared reference. However, it has to increment the +//@ reference count! Internally, `Rc` uses `Cell` for the count, such that it can be updated during +//@ `clone`. //@ -//@ Putting it all together, the story around mutation and ownership through references looks as follows: There are *unique* references, -//@ which - because of their exclusivity - are always safe to mutate through. And there are *shared* references, where the compiler cannot -//@ generally promise that mutation is safe. However, if extra circumstances guarantee that mutation *is* safe, then it can happen even -//@ through a shared reference - as we saw with `Cell`. +//@ Putting it all together, the story around mutation and ownership through references looks as +//@ follows: There are *unique* references, which - because of their exclusivity - are always safe +//@ to mutate through. And there are *shared* references, where the compiler cannot generally +//@ promise that mutation is safe. However, if extra circumstances guarantee that mutation *is* +//@ safe, then it can happen even through a shared reference - as we saw with `Cell`. // ## `RefCell` -//@ As the next step in the evolution of `Callbacks`, we could try to solve this problem of mutability once and for all, by adding `Cell` -//@ to `Callbacks` such that clients don't have to worry about this. However, that won't end up working: Remember that `Cell` only works -//@ with types that are `Copy`, which the environment of a closure will never be. We need a variant of `Cell` that allows borrowing its -//@ contents, such that we can provide a `FnMut` with its environment. But if `Cell` would allow that, we could write down all those -//@ crashing C++ programs that we wanted to get rid of. +//@ As the next step in the evolution of `Callbacks`, we could try to solve this problem of +//@ mutability once and for all, by adding `Cell` to `Callbacks` such that clients don't have to +//@ worry about this. However, that won't end up working: Remember that `Cell` only works with +//@ types that are `Copy`, which the environment of a closure will never be. We need a variant of +//@ `Cell` that allows borrowing its contents, such that we can provide a `FnMut` with its +//@ environment. But if `Cell` would allow that, we could write down all those crashing C++ +//@ programs that we wanted to get rid of. //@ -//@ This is the point where our program got too complex for Rust to guarantee at compile-time that nothing bad will happen. Since we don't -//@ want to give up the safety guarantee, we are going to need some code that actually checks at run-time that the borrowing rules -//@ are not violated. Such a check is provided by `RefCell`: Unlike `Cell`, this lets us borrow the contents, and it works for -//@ non-`Copy` `T`. But, as we will see, it incurs some run-time overhead. +//@ This is the point where our program got too complex for Rust to guarantee at compile-time that +//@ nothing bad will happen. Since we don't want to give up the safety guarantee, we are going to +//@ need some code that actually checks at run-time that the borrowing rules are not violated. Such +//@ a check is provided by `RefCell`: Unlike `Cell`, this lets us borrow the contents, and it +//@ works for non-`Copy` `T`. But, as we will see, it incurs some run-time overhead. // Our final version of `Callbacks` puts the closure environment into a `RefCell`. #[derive(Clone)] @@ -124,24 +138,33 @@ impl CallbacksMut { pub fn call(&mut self, val: i32) { for callback in self.callbacks.iter() { - // We have to *explicitly* borrow the contents of a `RefCell` by calling `borrow` or `borrow_mut`. - //@ At run-time, the cell will keep track of the number of outstanding shared and mutable references, - //@ and panic if the rules are violated.
- //@ For this check to be performed, `closure` is a *guard*: Rather than a normal reference, `borrow_mut` returns - //@ a smart pointer ([`RefMut`](https://doc.rust-lang.org/stable/std/cell/struct.RefMut.html), in this case) that waits until is goes out of scope, and then - //@ appropriately updates the number of active references. + // We have to *explicitly* borrow the contents of a `RefCell` by calling `borrow` or + // `borrow_mut`. + //@ At run-time, the cell will keep track of the number of outstanding shared and + //@ mutable references, and panic if the rules are violated.
+ //@ For this check to be performed, `closure` is a *guard*: Rather than a normal + //@ reference, `borrow_mut` returns a smart pointer ([`RefMut`](https://doc.rust- + //@ lang.org/stable/std/cell/struct.RefMut.html), in this case) that waits until is + //@ goes out of scope, and then appropriately updates the number of active references. //@ - //@ Since `call` is the only place that borrows the environments of the closures, we should expect that - //@ the check will always succeed, as is actually entirely useless. However, this is not actually true. Several different `CallbacksMut` could share - //@ a callback (as they were created with `clone`), and calling one callback here could trigger calling - //@ all callbacks of the other `CallbacksMut`, which would end up calling the initial callback again. This issue of functions accidentally recursively calling - //@ themselves is called *reentrancy*, and it can lead to subtle bugs. Here, it would mean that the closure runs twice, each time thinking it has a - //@ unique, mutable reference to its environment - so it may end up dereferencing a dangling pointer. Ouch! Lucky enough, - //@ Rust detects this at run-time and panics once we try to borrow the same environment again. I hope this also makes it - //@ clear that there's absolutely no hope of Rust performing these checks statically, at compile-time: It would have to detect reentrancy! + //@ Since `call` is the only place that borrows the environments of the closures, we + //@ should expect that the check will always succeed, as is actually entirely useless. + //@ However, this is not actually true. Several different `CallbacksMut` could share a + //@ callback (as they were created with `clone`), and calling one callback here could + //@ trigger calling all callbacks of the other `CallbacksMut`, which would end up + //@ calling the initial callback again. + //@ This issue of functions accidentally recursively calling themselves is called + //@ *reentrancy*, and it can lead to subtle bugs. Here, it would mean that the closure + //@ runs twice, each time thinking it has a unique, mutable reference to its + //@ environment - so it may end up dereferencing a dangling pointer. Ouch! + //@ Lucky enough, Rust detects this at run-time and panics once we try to borrow the + //@ same environment again. I hope this also makes it clear that there's absolutely no + //@ hope of Rust performing these checks statically, at compile-time: It would have to + //@ detect reentrancy! let mut closure = callback.borrow_mut(); - // Unfortunately, Rust's auto-dereference of pointers is not clever enough here. We thus have to explicitly - // dereference the smart pointer and obtain a mutable reference to the content. + // Unfortunately, Rust's auto-dereference of pointers is not clever enough here. We + // thus have to explicitly dereference the smart pointer and obtain a mutable reference + // to the content. (&mut *closure)(val); } } @@ -163,7 +186,9 @@ fn demo_mut(c: &mut CallbacksMut) { c.call(1); c.clone().call(2); } -// **Exercise 12.1**: Write some piece of code using only the available, public interface of `CallbacksMut` such that a reentrant call to a closure -// is happening, and the program panics because the `RefCell` refuses to hand out a second mutable borrow of the closure's environment. +// **Exercise 12.1**: Write some piece of code using only the available, public interface of +// `CallbacksMut` such that a reentrant call to a closure is happening, and the program panics +// because the `RefCell` refuses to hand out a second mutable borrow of the closure's environment. -//@ [index](main.html) | [previous](part11.html) | [raw source](workspace/src/part12.rs) | [next](part13.html) +//@ [index](main.html) | [previous](part11.html) | [raw source](workspace/src/part12.rs) | +//@ [next](part13.html) diff --git a/src/part13.rs b/src/part13.rs index 054a006..31f629b 100644 --- a/src/part13.rs +++ b/src/part13.rs @@ -6,15 +6,18 @@ use std::{io, fs, thread}; use std::sync::mpsc::{sync_channel, SyncSender, Receiver}; use std::sync::Arc; -//@ Our next stop are the concurrency features of Rust. We are going to write our own small version of "grep", -//@ called *rgrep*, and it is going to perform three jobs concurrently: One thread reads the input files, one thread does -//@ the actual matching, and one thread writes the output. I already mentioned in the beginning of the course that -//@ Rust's type system (more precisely, the discipline of ownership and borrowing) will help us to avoid a common -//@ pitfall of concurrent programming: data races. We will see how that works concretely. - -// Before we come to the actual code, we define a data-structure `Options` to store all the information we need -// to complete the job: Which files to work on, which pattern to look for, and how to output. -//@ Besides just printing all the matching lines, we will also offer to count them, or alternatively to sort them. +//@ Our next stop are the concurrency features of Rust. We are going to write our own small version +//@ of "grep", called *rgrep*, and it is going to perform three jobs concurrently: One thread reads +//@ the input files, one thread does the actual matching, and one thread writes the output. I +//@ already mentioned in the beginning of the course that Rust's type system (more precisely, the +//@ discipline of ownership and borrowing) will help us to avoid a common pitfall of concurrent +//@ programming: data races. We will see how that works concretely. + +// Before we come to the actual code, we define a data-structure `Options` to store all the +// information we need to complete the job: Which files to work on, which pattern to look for, and +// how to output. +//@ Besides just printing all the matching lines, we will also offer to count them, or +//@ alternatively to sort them. #[derive(Clone,Copy)] pub enum OutputMode { Print, @@ -29,17 +32,19 @@ pub struct Options { pub output_mode: OutputMode, } -//@ Now we can write three functions to do the actual job of reading, matching, and printing, respectively. -//@ To get the data from one thread to the next, we will use *message passing*: We will establish communication -//@ channels between the threads, with one thread *sending* data, and the other one *receiving* it. `SyncSender` -//@ is the type of the sending end of a synchronous channel transmitting data of type `T`. *Synchronous* here -//@ means that the `send` operation could block, waiting for the other side to make progress. We don't want to -//@ end up with the entire file being stored in the buffer of the channels, and the output not being fast enough -//@ to keep up with the speed of input. +//@ Now we can write three functions to do the actual job of reading, matching, and printing, +//@ respectively. To get the data from one thread to the next, we will use *message passing*: We +//@ will establish communication channels between the threads, with one thread *sending* data, and +//@ the other one *receiving* it. `SyncSender` is the type of the sending end of a synchronous +//@ channel transmitting data of type `T`. +//@ *Synchronous* here means that the `send` operation could block, waiting for the other side to +//@ make progress. We don't want to end up with the entire file being stored in the buffer of the +//@ channels, and the output not being fast enough to keep up with the speed of input. //@ -//@ We also need all the threads to have access to the options of the job they are supposed to do. Since it would -//@ be rather unnecessary to actually copy these options around, we will use reference-counting to share them between -//@ all threads. `Arc` is the thread-safe version of `Rc`, using atomic operations to keep the reference count up-to-date. +//@ We also need all the threads to have access to the options of the job they are supposed to do. +//@ Since it would be rather unnecessary to actually copy these options around, we will use +//@ reference-counting to share them between all threads. `Arc` is the thread-safe version of `Rc`, +//@ using atomic operations to keep the reference count up-to-date. // The first function reads the files, and sends every line over the `out_channel`. fn read_files(options: Arc, out_channel: SyncSender) { @@ -64,15 +69,17 @@ fn filter_lines(options: Arc, out_channel: SyncSender) { // We can simply iterate over the channel, which will stop when the channel is closed. for line in in_channel.iter() { - // `contains` works on lots of types of patterns, but in particular, we can use it to test whether - // one string is contained in another. This is another example of Rust using traits as substitute for overloading. + // `contains` works on lots of types of patterns, but in particular, we can use it to test + // whether one string is contained in another. This is another example of Rust using traits + // as substitute for overloading. if line.contains(&options.pattern) { out_channel.send(line).unwrap(); /*@*/ } } } -// The third function performs the output operations, receiving the relevant lines on its `in_channel`. +// The third function performs the output operations, receiving the relevant lines on its +// `in_channel`. fn output_lines(options: Arc, in_channel: Receiver) { match options.output_mode { Print => { @@ -82,13 +89,14 @@ fn output_lines(options: Arc, in_channel: Receiver) { } }, Count => { - // We are supposed to count the number of matching lines. There's a convenient iterator adapter that - // we can use for this job. + // We are supposed to count the number of matching lines. There's a convenient iterator + // adapter that we can use for this job. let count = in_channel.iter().count(); /*@*/ println!("{} hits for {}.", count, options.pattern); /*@*/ }, SortAndPrint => { - // We are asked to sort the matching lines before printing. So let's collect them all in a local vector... + // We are asked to sort the matching lines before printing. So let's collect them all + // in a local vector... let mut data: Vec = in_channel.iter().collect(); // ...and implement the actual sorting later. unimplemented!() @@ -96,20 +104,21 @@ fn output_lines(options: Arc, in_channel: Receiver) { } } -// With the operations of the three threads defined, we can now implement a function that performs grepping according -// to some given options. +// With the operations of the three threads defined, we can now implement a function that performs +// grepping according to some given options. pub fn run(options: Options) { // We move the `options` into an `Arc`, as that's what the thread workers expect. let options = Arc::new(options); - // This sets up the channels. We use a `sync_channel` with buffer-size of 16 to avoid needlessly filling RAM. + // This sets up the channels. We use a `sync_channel` with buffer-size of 16 to avoid needlessly + // filling RAM. let (line_sender, line_receiver) = sync_channel(16); let (filtered_sender, filtered_receiver) = sync_channel(16); // Spawn the read thread: `thread::spawn` takes a closure that is run in a new thread. - //@ The `move` keyword again tells Rust that we want ownership of captured variables to be moved into the - //@ closure. This means we need to do the `clone` *first*, otherwise we would lose our `options` to the - //@ new thread! + //@ The `move` keyword again tells Rust that we want ownership of captured variables to be + //@ moved into the closure. This means we need to do the `clone` *first*, otherwise we would + //@ lose our `options` to the new thread! let options1 = options.clone(); let handle1 = thread::spawn(move || read_files(options1, line_sender)); @@ -124,9 +133,9 @@ pub fn run(options: Options) { let handle3 = thread::spawn(move || output_lines(options3, filtered_receiver)); // Finally, wait until all three threads did their job. - //@ Joining a thread waits for its termination. This can fail if that thread panicked: In this case, we could get - //@ access to the data that it provided to `panic!`. Here, we just assert that they did not panic - so we will panic ourselves - //@ if that happened. + //@ Joining a thread waits for its termination. This can fail if that thread panicked: In this + //@ case, we could get access to the data that it provided to `panic!`. Here, we just assert + //@ that they did not panic - so we will panic ourselves if that happened. handle1.join().unwrap(); handle2.join().unwrap(); handle3.join().unwrap(); @@ -145,48 +154,59 @@ pub fn main() { run(options); } -// **Exercise 13.1**: Change rgrep such that it prints not only the matching lines, but also the name of the file -// and the number of the line in the file. You will have to change the type of the channels from `String` to something -// that records this extra information. +// **Exercise 13.1**: Change rgrep such that it prints not only the matching lines, but also the +// name of the file and the number of the line in the file. You will have to change the type of the +// channels from `String` to something that records this extra information. //@ ## Ownership, Borrowing, and Concurrency -//@ The little demo above showed that concurrency in Rust has a fairly simple API. Considering Rust has closures, -//@ that should not be entirely surprising. However, as it turns out, Rust goes well beyond this and actually ensures -//@ the absence of data races.
-//@ A data race is typically defined as having two concurrent, unsynchronized -//@ accesses to the same memory location, at least one of which is a write. In other words, a data race is mutation in -//@ the presence of aliasing, which Rust reliably rules out! It turns out that the same mechanism that makes our single-threaded -//@ programs memory safe, and that prevents us from invalidating iterators, also helps secure our multi-threaded code against -//@ data races. For example, notice how `read_files` sends a `String` to `filter_lines`. At run-time, only the pointer to -//@ the character data will actually be moved around (just like when a `String` is passed to a function with full ownership). However, -//@ `read_files` has to *give up* ownership of the string to perform `send`, to it is impossible for the string to still be borrowed. -//@ After it sent the string to the other side, `read_files` has no pointer into the string content -//@ anymore, and hence no way to race on the data with someone else. +//@ The little demo above showed that concurrency in Rust has a fairly simple API. Considering Rust +//@ has closures, that should not be entirely surprising. However, as it turns out, Rust goes well +//@ beyond this and actually ensures the absence of data races.
+//@ A data race is typically defined as having two concurrent, unsynchronized accesses to the same +//@ memory location, at least one of which is a write. In other words, a data race is mutation in +//@ the presence of aliasing, which Rust reliably rules out! It turns out that the same mechanism +//@ that makes our single-threaded programs memory safe, and that prevents us from invalidating +//@ iterators, also helps secure our multi-threaded code against data races. For example, notice +//@ how `read_files` sends a `String` to `filter_lines`. +//@ At run-time, only the pointer to the character data will actually be moved around (just like +//@ when a `String` is passed to a function with full ownership). However, `read_files` has to +//@ *give up* ownership of the string to perform `send`, to it is impossible for the string to +//@ still be borrowed. After it sent the string to the other side, `read_files` has no pointer into +//@ the string content anymore, and hence no way to race on the data with someone else. //@ -//@ There is a little more to this. Remember the `'static` bound we had to add to `register` in the previous parts, to make -//@ sure that the callbacks do not reference any pointers that might become invalid? This is just as crucial for spawning -//@ a thread: In general, that thread could last for much longer than the current stack frame. Thus, it must not use -//@ any pointers to data in that stack frame. This is achieved by requiring the `FnOnce` closure passed to `thread::spawn` -//@ to be valid for lifetime `'static`, as you can see in [its documentation](https://doc.rust-lang.org/stable/std/thread/fn.spawn.html). -//@ This avoids another kind of data race, where the thread's access races with the callee deallocating its stack frame. -//@ It is only thanks to the concept of lifetimes that this can be expressed as part of the type of `spawn`. +//@ There is a little more to this. Remember the `'static` bound we had to add to `register` in the +//@ previous parts, to make sure that the callbacks do not reference any pointers that might become +//@ invalid? This is just as crucial for spawning a thread: In general, that thread could last for +//@ much longer than the current stack frame. Thus, it must not use any pointers to data in that +//@ stack frame. This is achieved by requiring the `FnOnce` closure passed to `thread::spawn` to be +//@ valid for lifetime `'static`, as you can see in +//@ [its documentation](https://doc.rust-lang.org/stable/std/thread/fn.spawn.html). This avoids +//@ another kind of data race, where the thread's access races with the callee deallocating its +//@ stack frame. It is only thanks to the concept of lifetimes that this can be expressed as part +//@ of the type of `spawn`. //@ ## Send -//@ However, the story goes even further. I said above that `Arc` is a thread-safe version of `Rc`, which uses atomic operations -//@ to manipulate the reference count. It is thus crucial that we don't use `Rc` across multiple threads, or the reference count may -//@ become invalid. And indeed, if you replace `Arc` by `Rc` (and add the appropriate imports), Rust will tell you that something -//@ is wrong. That's great, of course, but how did it do that? +//@ However, the story goes even further. I said above that `Arc` is a thread-safe version of `Rc`, +//@ which uses atomic operations to manipulate the reference count. It is thus crucial that we +//@ don't use `Rc` across multiple threads, or the reference count may become invalid. And indeed, +//@ if you replace `Arc` by `Rc` (and add the appropriate imports), Rust will tell you that +//@ something is wrong. That's great, of course, but how did it do that? //@ -//@ The answer is already hinted at in the error: It will say something about `Send`. You may have noticed that the closure in -//@ `thread::spawn` does not just have a `'static` bound, but also has to satisfy `Send`. `Send` is a trait, and just like `Copy`, -//@ it's just a marker - there are no functions provided by `Send`. What the trait says is that types which are `Send` can be -//@ safely sent to another thread without causing trouble. Of course, all the primitive data-types are `Send`. So is `Arc`, -//@ which is why Rust accepted our code. But `Rc` is not `Send`, and for a good reason! If had two `Rc` to the same data, and -//@ sent one of them to another thread, things could go havoc due to the lack of synchronization. +//@ The answer is already hinted at in the error: It will say something about `Send`. You may have +//@ noticed that the closure in `thread::spawn` does not just have a `'static` bound, but also has +//@ to satisfy `Send`. `Send` is a trait, and just like `Copy`, it's just a marker - there are no +//@ functions provided by `Send`. What the trait says is that types which are `Send` can be safely +//@ sent to another thread without causing trouble. +//@ Of course, all the primitive data-types are `Send`. So is `Arc`, which is why Rust accepted our +//@ code. But `Rc` is not `Send`, and for a good reason! If had two `Rc` to the same data, and sent +//@ one of them to another thread, things could go havoc due to the lack of synchronization. //@ -//@ Now, `Send` as a trait is fairly special. It has a so-called *default implementation*. This means that *every type* implements -//@ `Send`, unless it opts out. Opting out is viral: If your type contains a type that opted out, then you don't have `Send`, either. -//@ So if the environment of your closure contains an `Rc`, it won't be `Send`, preventing it from causing trouble. If however every -//@ captured variable *is* `Send`, then so is the entire environment, and you are good. - -//@ [index](main.html) | [previous](part12.html) | [raw source](workspace/src/part13.rs) | [next](part14.html) +//@ Now, `Send` as a trait is fairly special. It has a so-called *default implementation*. This +//@ means that *every type* implements `Send`, unless it opts out. Opting out is viral: If your +//@ type contains a type that opted out, then you don't have `Send`, either. So if the environment +//@ of your closure contains an `Rc`, it won't be `Send`, preventing it from causing trouble. If +//@ however every captured variable *is* `Send`, then so is the entire environment, and you are +//@ good. + +//@ [index](main.html) | [previous](part12.html) | [raw source](workspace/src/part13.rs) | +//@ [next](part14.html) diff --git a/src/part14.rs b/src/part14.rs index 5c00905..059d1b5 100644 --- a/src/part14.rs +++ b/src/part14.rs @@ -1,104 +1,123 @@ // Rust-101, Part 14: Slices, Arrays, External Dependencies // ======================================================== -//@ To complete rgrep, there are two pieces we still need to implement: Sorting, and taking the job options -//@ as argument to the program, rather than hard-coding them. Let's start with sorting. +//@ To complete rgrep, there are two pieces we still need to implement: Sorting, and taking the job +//@ options as argument to the program, rather than hard-coding them. Let's start with sorting. // ## Slices -//@ Again, we first have to think about the type we want to give to our sorting function. We may be inclined to -//@ pass it a `Vec`. Of course, sorting does not actually consume the argument, so we should make that a `&mut Vec`. -//@ But there's a problem with that: If we want to implement some divide-and-conquer sorting algorithm (say, -//@ Quicksort), then we will have to *split* our argument at some point, and operate recursively on the two parts. -//@ But we can't split a `Vec`! We could now extend the function signature to also take some indices, marking the -//@ part of the vector we are supposed to sort, but that's all rather clumsy. Rust offers a nicer solution. - -//@ `[T]` is the type of an (unsized) *array*, with elements of type `T`. All this means is that there's a contiguous -//@ region of memory, where a bunch of `T` are stored. How many? We can't tell! This is an unsized type. Just like for -//@ trait objects, this means we can only operate on pointers to that type, and these pointers will carry the missing -//@ information - namely, the length (they will be *fat pointers*). Such a reference to an array is called a *slice*. As we will see, a slice can be split. -//@ Our function can thus take a mutable slice, and promise to sort all elements in there. +//@ Again, we first have to think about the type we want to give to our sorting function. We may be +//@ inclined to pass it a `Vec`. Of course, sorting does not actually consume the argument, so +//@ we should make that a `&mut Vec`. But there's a problem with that: If we want to implement +//@ some divide-and-conquer sorting algorithm (say, Quicksort), then we will have to *split* our +//@ argument at some point, and operate recursively on the two parts. But we can't split a `Vec`! +//@ We could now extend the function signature to also take some indices, marking the part of the +//@ vector we are supposed to sort, but that's all rather clumsy. Rust offers a nicer solution. + +//@ `[T]` is the type of an (unsized) *array*, with elements of type `T`. All this means is that +//@ there's a contiguous region of memory, where a bunch of `T` are stored. How many? We can't +//@ tell! This is an unsized type. Just like for trait objects, this means we can only operate on +//@ pointers to that type, and these pointers will carry the missing information - namely, the +//@ length (they will be *fat pointers*). Such a reference to an array is called a *slice*. As we +//@ will see, a slice can be split. Our function can thus take a mutable slice, and promise to sort +//@ all elements in there. pub fn sort(data: &mut [T]) { if data.len() < 2 { return; } - // We decide that the element at 0 is our pivot, and then we move our cursors through the rest of the slice, - // making sure that everything on the left is no larger than the pivot, and everything on the right is no smaller. + // We decide that the element at 0 is our pivot, and then we move our cursors through the rest + // of the slice, making sure that everything on the left is no larger than the pivot, and + // everything on the right is no smaller. let mut lpos = 1; let mut rpos = data.len(); /* Invariant: pivot is data[0]; everything with index (0,lpos) is <= pivot; [rpos,len) is >= pivot; lpos < rpos */ loop { - // **Exercise 14.1**: Complete this Quicksort loop. You can use `swap` on slices to swap two elements. Write a - // test function for `sort`. + // **Exercise 14.1**: Complete this Quicksort loop. You can use `swap` on slices to swap + // two elements. Write a test function for `sort`. unimplemented!() } // Once our cursors met, we need to put the pivot in the right place. data.swap(0, lpos-1); - // Finally, we split our slice to sort the two halves. The nice part about slices is that splitting them is cheap: - //@ They are just a pointer to a start address, and a length. We can thus get two pointers, one at the beginning and - //@ one in the middle, and set the lengths appropriately such that they don't overlap. This is what `split_at_mut` does. - //@ Since the two slices don't overlap, there is no aliasing and we can have both of them as unique, mutable slices. + // Finally, we split our slice to sort the two halves. The nice part about slices is that + // splitting them is cheap: + //@ They are just a pointer to a start address, and a length. We can thus get two pointers, one + //@ at the beginning and one in the middle, and set the lengths appropriately such that they + //@ don't overlap. This is what `split_at_mut` does. Since the two slices don't overlap, there + //@ is no aliasing and we can have both of them as unique, mutable slices. let (part1, part2) = data.split_at_mut(lpos); - //@ The index operation can not only be used to address certain elements, it can also be used for *slicing*: Giving a range - //@ of indices, and obtaining an appropriate part of the slice we started with. Here, we remove the last element from - //@ `part1`, which is the pivot. This makes sure both recursive calls work on strictly smaller slices. + //@ The index operation can not only be used to address certain elements, it can also be used + //@ for *slicing*: Giving a range of indices, and obtaining an appropriate part of the slice we + //@ started with. Here, we remove the last element from `part1`, which is the pivot. This makes + //@ sure both recursive calls work on strictly smaller slices. sort(&mut part1[..lpos-1]); /*@*/ sort(part2); /*@*/ } -// **Exercise 14.2**: Since `String` implements `PartialEq`, you can now change the function `output_lines` in the previous part -// to call the sort function above. If you did exercise 13.1, you will have slightly more work. Make sure you sort by the matched line -// only, not by filename or line number! +// **Exercise 14.2**: Since `String` implements `PartialEq`, you can now change the function +// `output_lines` in the previous part to call the sort function above. If you did exercise 13.1, +// you will have slightly more work. Make sure you sort by the matched line only, not by filename +// or line number! // Now, we can sort, e.g., an vector of numbers. fn sort_nums(data: &mut Vec) { - //@ Vectors support slicing, just like slices do. Here, `..` denotes the full range, which means we want to slice the entire vector. - //@ It is then passed to the `sort` function, which doesn't even know that it is working on data inside a vector. + //@ Vectors support slicing, just like slices do. Here, `..` denotes the full range, which + //@ means we want to slice the entire vector. It is then passed to the `sort` function, which + //@ doesn't even know that it is working on data inside a vector. sort(&mut data[..]); } // ## Arrays -//@ An *array* in Rust is given by the type `[T; n]`, where `n` is some *fixed* number. So, `[f64; 10]` is an array of 10 floating-point -//@ numbers, all one right next to the other in memory. Arrays are sized, and hence can be used like any other type. But we can also -//@ borrow them as slices, e.g., to sort them. +//@ An *array* in Rust is given by the type `[T; n]`, where `n` is some *fixed* number. So, `[f64; +//@ 10]` is an array of 10 floating-point numbers, all one right next to the other in memory. +//@ Arrays are sized, and hence can be used like any other type. But we can also borrow them as +//@ slices, e.g., to sort them. fn sort_array() { let mut array_of_data: [f64; 5] = [1.0, 3.4, 12.7, -9.12, 0.1]; sort(&mut array_of_data); } // ## External Dependencies -//@ This leaves us with just one more piece to complete rgrep: Taking arguments from the command-line. We could now directly work on -//@ [`std::env::args`](https://doc.rust-lang.org/stable/std/env/fn.args.html) to gain access to those arguments, and this would become -//@ a pretty boring lesson in string manipulation. Instead, I want to use this opportunity to show how easy it is to benefit from -//@ other people's work in your program. +//@ This leaves us with just one more piece to complete rgrep: Taking arguments from the command- +//@ line. We could now directly work on [`std::env::args`](https://doc.rust- +//@ lang.org/stable/std/env/fn.args.html) to gain access to those arguments, and this would become +//@ a pretty boring lesson in string manipulation. Instead, I want to use this opportunity to show +//@ how easy it is to benefit from other people's work in your program. //@ -//@ For sure, we are not the first to equip a Rust program with support for command-line arguments. Someone must have written a library -//@ for the job, right? Indeed, someone has. Rust has a central repository of published libraries, called [crates.io](https://crates.io/). -//@ It's a bit like [PyPI](https://pypi.python.org/pypi) or the [Ruby Gems](https://rubygems.org/): Everybody can upload their code, -//@ and there's tooling for importing that code into your project. This tooling is provided by `cargo`, the tool we are already using to -//@ build this tutorial. (`cargo` also has support for *publishing* your crate on crates.io, I refer you to [the documentation](http://doc.crates.io/crates-io.html) for more details.) -//@ In this case, we are going to use the [`docopt` crate](https://crates.io/crates/docopt), which creates a parser for command-line -//@ arguments based on the usage string. External dependencies are declared in the `Cargo.toml` file. - -//@ I already prepared that file, but the declaration of the dependency is still commented out. So please open `Cargo.toml` of your workspace -//@ now, and enable the two commented-out lines. Then do `cargo build`. Cargo will now download the crate from crates.io, compile it, -//@ and link it to your program. In the future, you can do `cargo update` to make it download new versions of crates you depend on. -//@ Note that crates.io is only the default location for dependencies, you can also give it the URL of a git repository or some local -//@ path. All of this is explained in the [Cargo Guide](http://doc.crates.io/guide.html). - -// I disabled the following module (using a rather bad hack), because it only compiles if `docopt` is linked. -// Remove the attribute of the `rgrep` module to enable compilation. +//@ For sure, we are not the first to equip a Rust program with support for command-line arguments. +//@ Someone must have written a library for the job, right? Indeed, someone has. Rust has a central +//@ repository of published libraries, called [crates.io](https://crates.io/). +//@ It's a bit like [PyPI](https://pypi.python.org/pypi) or the [Ruby Gems](https://rubygems.org/): +//@ Everybody can upload their code, and there's tooling for importing that code into your project. +//@ This tooling is provided by `cargo`, the tool we are already using to build this tutorial. +//@ (`cargo` also has support for *publishing* your crate on crates.io, I refer you to [the +//@ documentation](http://doc.crates.io/crates-io.html) for more details.) +//@ In this case, we are going to use the [`docopt` crate](https://crates.io/crates/docopt), which +//@ creates a parser for command-line arguments based on the usage string. External dependencies +//@ are declared in the `Cargo.toml` file. + +//@ I already prepared that file, but the declaration of the dependency is still commented out. So +//@ please open `Cargo.toml` of your workspace now, and enable the two commented-out lines. Then do +//@ `cargo build`. Cargo will now download the crate from crates.io, compile it, and link it to +//@ your program. In the future, you can do `cargo update` to make it download new versions of +//@ crates you depend on. +//@ Note that crates.io is only the default location for dependencies, you can also give it the URL +//@ of a git repository or some local path. All of this is explained in the +//@ [Cargo Guide](http://doc.crates.io/guide.html). + +// I disabled the following module (using a rather bad hack), because it only compiles if `docopt` +// is linked. Remove the attribute of the `rgrep` module to enable compilation. #[cfg(feature = "disabled")] pub mod rgrep { - // Now that `docopt` is linked, we can first add it to the namespace with `extern crate` and then import shorter names with `use`. - // We also import some other pieces that we will need. + // Now that `docopt` is linked, we can first add it to the namespace with `extern crate` and + // then import shorter names with `use`. We also import some other pieces that we will need. extern crate docopt; use self::docopt::Docopt; use part13::{run, Options, OutputMode}; use std::process; - // The `USAGE` string documents how the program is to be called. It's written in a format that `docopt` can parse. + // The `USAGE` string documents how the program is to be called. It's written in a format that + // `docopt` can parse. static USAGE: &'static str = " Usage: rgrep [-c] [-s] ... @@ -109,12 +128,15 @@ Options: // This function extracts the rgrep options from the command-line arguments. fn get_options() -> Options { - // This parses `argv` and exit the program with an error message if it fails. The code is taken from the [`docopt` documentation](http://burntsushi.net/rustdoc/docopt/).
- //@ The function `and_then` takes a closure from `T` to `Result`, and uses it to transform a `Result` to a - //@ `Result`. This way, we can chain computations that only happen if the previous one succeeded (and the error - //@ type has to stay the same). In case you know about monads, this style of programming will be familiar to you. - //@ There's a similar function for `Option`. `unwrap_or_else` is a bit like `unwrap`, but rather than panicking in - //@ case of an `Err`, it calls the closure. + // This parses `argv` and exit the program with an error message if it fails. The code is + // taken from the [`docopt` documentation](http://burntsushi.net/rustdoc/docopt/).
+ //@ The function `and_then` takes a closure from `T` to `Result`, and uses it to + //@ transform a `Result` to a `Result`. This way, we can chain computations + //@ that only happen if the previous one succeeded (and the error type has to stay the + //@ same). In case you know about monads, this style of programming will be familiar to + //@ you. + //@ There's a similar function for `Option`. `unwrap_or_else` is a bit like `unwrap`, but + //@ rather than panicking in case of an `Err`, it calls the closure. let args = Docopt::new(USAGE).and_then(|d| d.parse()).unwrap_or_else(|e| e.exit()); // Now we can get all the values out. let count = args.get_bool("-c"); @@ -127,12 +149,14 @@ Options: } // We need to make the strings owned to construct the `Options` instance. - //@ If you check all the types carefully, you will notice that `pattern` above is of type `&str`. `str` is the type of a UTF-8 - //@ encoded string, that is, a bunch of bytes in memory (`[u8]`) that are valid according of UTF-8. `str` is unsized. `&str` - //@ stores the address of the character data, and their length. String literals like "this one" are - //@ of type `&'static str`: They point right to the constant section of the binary, so - //@ the reference is valid for the entire program. The bytes pointed to by `pattern`, on the other hand, are owned by someone else, - //@ and we call `to_string` on it to copy the string data into a buffer on the heap that we own. + //@ If you check all the types carefully, you will notice that `pattern` above is of type + //@ `&str`. `str` is the type of a UTF-8 encoded string, that is, a bunch of bytes in + //@ memory (`[u8]`) that are valid according of UTF-8. `str` is unsized. `&str` stores the + //@ address of the character data, and their length. + //@ String literals like "this one" are of type `&'static str`: They point right to the + //@ constant section of the binary, so the reference is valid for the entire program. The + //@ bytes pointed to by `pattern`, on the other hand, are owned by someone else, and we + //@ call `to_string` on it to copy the string data into a buffer on the heap that we own. let mode = if count { OutputMode::Count } else if sort { @@ -147,16 +171,22 @@ Options: } } - // Finally, we can call the `run` function from the previous part on the options extracted using `get_options`. Edit `main.rs` to call this function. - // You can now use `cargo run -- ` to call your program, and see the argument parser and the threads we wrote previously in action! + // Finally, we can call the `run` function from the previous part on the options extracted using + // `get_options`. Edit `main.rs` to call this function. + // You can now use `cargo run -- ` to call your program, and see the argument + // parser and the threads we wrote previously in action! pub fn main() { run(get_options()); /*@*/ } } -// **Exercise 14.3**: Wouldn't it be nice if rgrep supported regular expressions? There's already a crate that does all the parsing and matching on regular -// expression, it's called [regex](https://crates.io/crates/regex). Add this crate to the dependencies of your workspace, add an option ("-r") to switch -// the pattern to regular-expression mode, and change `filter_lines` to honor this option. The documentation of regex is available from its crates.io site. -// (You won't be able to use the `regex!` macro if you are on the stable or beta channel of Rust. But it wouldn't help for our use-case anyway.) +// **Exercise 14.3**: Wouldn't it be nice if rgrep supported regular expressions? There's already a +// crate that does all the parsing and matching on regular expression, it's called +// [regex](https://crates.io/crates/regex). Add this crate to the dependencies of your workspace, +// add an option ("-r") to switch the pattern to regular-expression mode, and change `filter_lines` +// to honor this option. The documentation of regex is available from its crates.io site. +// (You won't be able to use the `regex!` macro if you are on the stable or beta channel of Rust. +// But it wouldn't help for our use-case anyway.) -//@ [index](main.html) | [previous](part13.html) | [raw source](workspace/src/part14.rs) | [next](part15.html) +//@ [index](main.html) | [previous](part13.html) | [raw source](workspace/src/part14.rs) | +//@ [next](part15.html) diff --git a/src/part15.rs b/src/part15.rs index b1cab06..e646d34 100644 --- a/src/part15.rs +++ b/src/part15.rs @@ -5,29 +5,35 @@ use std::sync::{Arc, Mutex}; use std::thread; use std::time::Duration; -//@ We already saw that we can use `Arc` to share memory between threads. However, `Arc` can only provide *read-only* -//@ access to memory: Since there is aliasing, Rust cannot, in general, permit mutation. To implement shared-memory -//@ concurrency, we need to have aliasing and permutation - following, of course, some strict rules to make sure -//@ there are no data races. In Rust, shared-memory concurrency is obtained through *interior mutability*, -//@ which we already discussed in a single-threaded context in part 12. +//@ We already saw that we can use `Arc` to share memory between threads. However, `Arc` can only +//@ provide *read-only* access to memory: Since there is aliasing, Rust cannot, in general, permit +//@ mutation. To implement shared-memory concurrency, we need to have aliasing and permutation - +//@ following, of course, some strict rules to make sure there are no data races. In Rust, shared- +//@ memory concurrency is obtained through *interior mutability*, which we already discussed in a +//@ single-threaded context in part 12. //@ //@ ## `Mutex` -//@ The most basic type for interior mutability that supports concurrency is [`Mutex`](https://doc.rust-lang.org/stable/std/sync/struct.Mutex.html). -//@ This type implements *critical sections* (or *locks*), but in a data-driven way: One has to specify -//@ the type of the data that's protected by the mutex, and Rust ensures that the data is *only* accessed -//@ through the mutex. In other words, "lock data, not code" is actually enforced by the type system, which -//@ becomes possible because of the discipline of ownership and borrowing. +//@ The most basic type for interior mutability that supports concurrency is +//@ [`Mutex`](https://doc.rust-lang.org/stable/std/sync/struct.Mutex.html). +//@ This type implements *critical sections* (or *locks*), but in a data-driven way: One has to +//@ specify the type of the data that's protected by the mutex, and Rust ensures that the data is +//@ *only* accessed through the mutex. In other words, "lock data, not code" is actually enforced +//@ by the type system, which becomes possible because of the discipline of ownership and +//@ borrowing. //@ -//@ As an example, let us write a concurrent counter. As usual in Rust, we first have to think about our data layout: -//@ That will be `Mutex`. Of course, we want multiple threads to have access to this `Mutex`, so we wrap it in an `Arc`. +//@ As an example, let us write a concurrent counter. As usual in Rust, we first have to think +//@ about our data layout: That will be `Mutex`. Of course, we want multiple threads to have +//@ access to this `Mutex`, so we wrap it in an `Arc`. //@ -//@ Rather than giving every field a name, a struct can also be defined by just giving a sequence of types (similar -//@ to how a variant of an `enum` is defined). This is called a *tuple struct*. It is often used when constructing -//@ a *newtype*, as we do here: `ConcurrentCounter` is essentially just a new name for `Arc>`. However, -//@ is is a locally declared types, so we can give it an inherent implementation and implement traits for it. Since the -//@ field is private, nobody outside this module can even know the type we are wrapping. - -// The derived `Clone` implementation will clone the `Arc`, so all clones will actually talk about the same counter. +//@ Rather than giving every field a name, a struct can also be defined by just giving a sequence +//@ of types (similar to how a variant of an `enum` is defined). This is called a *tuple struct*. +//@ It is often used when constructing a *newtype*, as we do here: `ConcurrentCounter` is +//@ essentially just a new name for `Arc>`. However, is is a locally declared types, +//@ so we can give it an inherent implementation and implement traits for it. Since the field is +//@ private, nobody outside this module can even know the type we are wrapping. + +// The derived `Clone` implementation will clone the `Arc`, so all clones will actually talk about +// the same counter. #[derive(Clone)] struct ConcurrentCounter(Arc>); @@ -39,21 +45,25 @@ impl ConcurrentCounter { // The core operation is, of course, `increment`. pub fn increment(&self, by: usize) { - // `lock` on a mutex returns a guard, very much like `RefCell`. The guard gives access to the data contained in the mutex. - //@ (We will discuss the `unwrap` soon.) `.0` is how we access the first component of a tuple or a struct. + // `lock` on a mutex returns a guard, very much like `RefCell`. The guard gives access to + // the data contained in the mutex. + //@ (We will discuss the `unwrap` soon.) `.0` is how we access the first component of a + //@ tuple or a struct. let mut counter = self.0.lock().unwrap(); //@ The guard is a smart pointer to the content. *counter = *counter + by; //@ At the end of the function, `counter` is dropped and the mutex is available again. - //@ This can only happen when full ownership of the guard is given up. In particular, it is impossible for us - //@ to take a reference to some of its content, release the lock of the mutex, and subsequently access the protected data without holding - //@ the lock. Enforcing the locking discipline is expressible in the Rust type system, so we don't have to worry - //@ about data races *even though* we are mutating shared memory! + //@ This can only happen when full ownership of the guard is given up. In particular, it is + //@ impossible for us to take a reference to some of its content, release the lock of the + //@ mutex, and subsequently access the protected data without holding the lock. Enforcing + //@ the locking discipline is expressible in the Rust type system, so we don't have to + //@ worry about data races *even though* we are mutating shared memory! //@ - //@ One of the subtle aspects of locking is *poisoning*. If a thread panics while it holds a lock, it could leave the - //@ data-structure in a bad state. The lock is hence considered *poisoned*. Future attempts to `lock` it will fail. - //@ Above, we simply assert via `unwrap` that this will never happen. Alternatively, we could have a look at the poisoned - //@ state and attempt to recover from it. + //@ One of the subtle aspects of locking is *poisoning*. If a thread panics while it holds + //@ a lock, it could leave the data-structure in a bad state. The lock is hence considered + //@ *poisoned*. Future attempts to `lock` it will fail. + //@ Above, we simply assert via `unwrap` that this will never happen. Alternatively, we + //@ could have a look at the poisoned state and attempt to recover from it. } // The function `get` returns the current value of the counter. @@ -91,58 +101,74 @@ pub fn main() { println!("Current value: {}", counter.get()); } - // Finally, we wait for all the threads to finish to be sure we can catch the counter's final value. + // Finally, we wait for all the threads to finish to be sure we can catch the counter's final + // value. handle1.join().unwrap(); handle2.join().unwrap(); println!("Final value: {}", counter.get()); } -// **Exercise 15.1**: Add an operation `compare_and_inc(&self, test: usize, by: usize)` that increments the counter by -// `by` *only if* the current value is `test`. +// **Exercise 15.1**: Add an operation `compare_and_inc(&self, test: usize, by: usize)` that +// increments the counter by `by` *only if* the current value is `test`. // -// **Exercise 15.2**: Rather than panicking in case the lock is poisoned, we can use `into_inner` on the error to recover -// the data inside the lock. Change the code above to do that. Try using `unwrap_or_else` for this job. +// **Exercise 15.2**: Rather than panicking in case the lock is poisoned, we can use `into_inner` +// on the error to recover the data inside the lock. Change the code above to do that. Try using +// `unwrap_or_else` for this job. //@ ## `RwLock` -//@ Besides `Mutex`, there's also [`RwLock`](https://doc.rust-lang.org/stable/std/sync/struct.RwLock.html), which -//@ provides two ways of locking: One that grants only read-only access, to any number of concurrent readers, and another one -//@ for exclusive write access. Notice that this is the same pattern we already saw with shared vs. mutable references. Hence -//@ another way of explaining `RwLock` is to say that it is like `RefCell`, but works even for concurrent access. Rather than -//@ panicking when the data is already borrowed, `RwLock` will of course block the current thread until the lock is available. -//@ In this view, `Mutex` is a stripped-down version of `RwLock` that does not distinguish readers and writers. - -// **Exercise 15.3**: Change the code above to use `RwLock`, such that multiple calls to `get` can be executed at the same time. +//@ Besides `Mutex`, there's also +//@ [`RwLock`](https://doc.rust-lang.org/stable/std/sync/struct.RwLock.html), which provides two +//@ ways of locking: One that grants only read-only access, to any number of concurrent readers, +//@ and another one for exclusive write access. Notice that this is the same pattern we already +//@ saw with shared vs. mutable references. Hence another way of explaining `RwLock` is to say that +//@ it is like `RefCell`, but works even for concurrent access. Rather than panicking when the data +//@ is already borrowed, `RwLock` will of course block the current thread until the lock is +//@ available. In this view, `Mutex` is a stripped-down version of `RwLock` that does not +//@ distinguish readers and writers. + +// **Exercise 15.3**: Change the code above to use `RwLock`, such that multiple calls to `get` can +// be executed at the same time. //@ ## `Sync` -//@ Clearly, if we had used `RefCell` rather than `Mutex`, the code above could not work: `RefCell` is not prepared for -//@ multiple threads trying to access the data at the same time. How does Rust make sure that we don't accidentally use -//@ `RefCell` across multiple threads? +//@ Clearly, if we had used `RefCell` rather than `Mutex`, the code above could not work: `RefCell` +//@ is not prepared for multiple threads trying to access the data at the same time. How does Rust +//@ make sure that we don't accidentally use `RefCell` across multiple threads? //@ -//@ In part 13, we talked about types that are marked `Send` and thus can be moved to another thread. However, we did *not* -//@ talk about the question whether a reference is `Send`. For `&mut T`, the answer is: It is `Send` whenever `T` is send. -//@ `&mut` allows moving values back and forth, it is even possible to [`swap`](https://doc.rust-lang.org/stable/std/mem/fn.swap.html) -//@ the contents of two mutable references. So in terms of concurrency, sending a mutable, unique reference is very much like +//@ In part 13, we talked about types that are marked `Send` and thus can be moved to another +//@ thread. However, we did *not* talk about the question whether a reference is `Send`. For `&mut +//@ T`, the answer is: It is `Send` whenever `T` is send. +//@ `&mut` allows moving values back and forth, it is even possible to +//@ [`swap`](https://doc.rust-lang.org/stable/std/mem/fn.swap.html) the contents of two mutable +//@ references. So in terms of concurrency, sending a mutable, unique reference is very much like //@ sending full ownership, in the sense that it can be used to move the object to another thread. //@ -//@ But what about `&T`, a shared reference? Without interior mutability, it would always be all-right to send such values. -//@ After all, no mutation can be performed, so there can be as many threads accessing the data as we like. In the -//@ presence of interior mutability though, the story gets more complicated. Rust introduces another marker trait for -//@ this purpose: `Sync`. A type `T` is `Sync` if and only if `&T` is `Send`. Just like `Send`, `Sync` has a default implementation -//@ and is thus automatically implemented for a data-structure *if* all its members implement it. +//@ But what about `&T`, a shared reference? Without interior mutability, it would always be all- +//@ right to send such values. After all, no mutation can be performed, so there can be as many +//@ threads accessing the data as we like. In the presence of interior mutability though, the story +//@ gets more complicated. Rust introduces another marker trait for this purpose: `Sync`. A type +//@ `T` is `Sync` if and only if `&T` is `Send`. Just like `Send`, `Sync` has a default +//@ implementation and is thus automatically implemented for a data-structure *if* all its members +//@ implement it. //@ -//@ Since `Arc` provides multiple threads with a shared reference to its content, `Arc` is only `Send` if `T` is `Sync`. -//@ So if we had used `RefCell` above, which is *not* `Sync`, Rust would have caught that mistake. Notice however that -//@ `RefCell` *is* `Send`: If ownership of the entire cell is moved to another thread, it is still not possible for several -//@ threads to try to access the data at the same time. +//@ Since `Arc` provides multiple threads with a shared reference to its content, `Arc` is only +//@ `Send` if `T` is `Sync`. So if we had used `RefCell` above, which is *not* `Sync`, Rust would +//@ have caught that mistake. Notice however that `RefCell` *is* `Send`: If ownership of the entire +//@ cell is moved to another thread, it is still not possible for several threads to try to access +//@ the data at the same time. //@ -//@ Almost all the types we saw so far are `Sync`, with the exception of `Rc`. Remember that a shared reference is good enough -//@ for cloning, and we don't want other threads to clone our local `Rc` (they would race for updating the reference count), -//@ so it must not be `Sync`. The rule of `Mutex` is to enforce synchronization, so it should not be entirely surprising that -//@ `Mutex` is `Send` *and* `Sync` provided that `T` is `Send`. +//@ Almost all the types we saw so far are `Sync`, with the exception of `Rc`. Remember that a +//@ shared reference is good enough for cloning, and we don't want other threads to clone our local +//@ `Rc` (they would race for updating the reference count), so it must not be `Sync`. The rule of +//@ `Mutex` is to enforce synchronization, so it should not be entirely surprising that `Mutex` +//@ is `Send` *and* `Sync` provided that `T` is `Send`. //@ -//@ You may be curious whether there is a type that's `Sync`, but not `Send`. There are indeed rather esoteric examples -//@ of such types, but that's not a topic I want to go into. In case you are curious, there's a -//@ [Rust RFC](https://github.com/rust-lang/rfcs/blob/master/text/0458-send-improvements.md), which contains a type `RcMut` that would be `Sync` and not `Send`. -//@ You may also be interested in [this blog post](https://huonw.github.io/blog/2015/02/some-notes-on-send-and-sync/) on the topic. - -//@ [index](main.html) | [previous](part14.html) | [raw source](workspace/src/part15.rs) | [next](part16.html) +//@ You may be curious whether there is a type that's `Sync`, but not `Send`. There are indeed +//@ rather esoteric examples of such types, but that's not a topic I want to go into. In case you +//@ are curious, there's a +//@ [Rust RFC](https://github.com/rust-lang/rfcs/blob/master/text/0458-send-//@ improvements.md), +//@ which contains a type `RcMut` that would be `Sync` and not `Send`. +//@ You may also be interested in this +//@ [blog post](https://huonw.github.io/blog/2015/02/some-notes-on-send-and-sync/) on the topic. + +//@ [index](main.html) | [previous](part14.html) | [raw source](workspace/src/part15.rs) | +//@ [next](part16.html) diff --git a/src/part16.rs b/src/part16.rs index f0707aa..876df27 100644 --- a/src/part16.rs +++ b/src/part16.rs @@ -5,26 +5,32 @@ use std::ptr; use std::mem; use std::marker::PhantomData; -//@ As we saw, the rules Rust imposes to ensure memory safety can get us pretty far. A large amount of programming patterns -//@ can be written within safe Rust, and, more importantly, library code like iterators or threads can make -//@ use of the type system to ensure some level of correctness beyond basic memory safety. +//@ As we saw, the rules Rust imposes to ensure memory safety can get us pretty far. A large amount +//@ of programming patterns can be written within safe Rust, and, more importantly, library code +//@ like iterators or threads can make use of the type system to ensure some level of correctness +//@ beyond basic memory safety. //@ -//@ However, there will still be programs that one cannot write in accordance with the borrow checker. And there -//@ will be cases where it may be possible to satisfy the compiler, but only at the cost of some run-time overhead, -//@ as we saw with `RefCell` - overhead which may not be acceptable. In such a situation, it is possible to -//@ use *unsafe* Rust: That's a part of the language that is *known* to open the gate to invalid pointer access -//@ and all other sorts of memory safety. Of course, `unsafe` also means "Here Be Dragons": You are on your own now.
-//@ The goal in these cases is to confine unsafety to the local module. Types like `Rc` and `Vec` are implemented -//@ using unsafe Rust, but *using* them as we did is (believed to be) perfectly safe. +//@ However, there will still be programs that one cannot write in accordance with the borrow +//@ checker. And there will be cases where it may be possible to satisfy the compiler, but only at +//@ the cost of some run-time overhead, as we saw with `RefCell` - overhead which may not be +//@ acceptable. In such a situation, it is possible to use *unsafe* Rust: That's a part of the +//@ language that is *known* to open the gate to invalid pointer access and all other sorts of +//@ memory safety. Of course, `unsafe` also means "Here Be Dragons": You are on your own now.
+//@ The goal in these cases is to confine unsafety to the local module. Types like `Rc` and `Vec` +//@ are implemented using unsafe Rust, but *using* them as we did is (believed to be) perfectly +//@ safe. //@ //@ ## Unsafe Code -//@ As an example, let us write a doubly-linked list. Clearly, such a data-structure involves aliasing and mutation: -//@ Every node in the list is pointed to by its left and right neighbor, but still we will want to modify the nodes. We -//@ could now try some clever combination of `Rc` and `RefCell`, but this would end up being quite annoying - and it would -//@ incur some overhead. For a low-level data-structure like a doubly-linked list, it makes sense to implement an efficient -//@ version once, that is unsafe internally, but that can be used without any risk by safe client code. - -//@ As usually, we start by defining the types. Everything is parameterized by the type `T` of the data stored in the list. +//@ As an example, let us write a doubly-linked list. Clearly, such a data-structure involves +//@ aliasing and mutation: Every node in the list is pointed to by its left and right neighbor, but +//@ still we will want to modify the nodes. We could now try some clever combination of `Rc` and +//@ `RefCell`, but this would end up being quite annoying - and it would incur some overhead. For a +//@ low-level data-structure like a doubly-linked list, it makes sense to implement an efficient +//@ version once, that is unsafe internally, but that can be used without any risk by safe client +//@ code. + +//@ As usually, we start by defining the types. Everything is parameterized by the type `T` of the +//@ data stored in the list. // A node of the list consists of the data, and two node pointers for the predecessor and successor. struct Node { next: NodePtr, @@ -32,48 +38,62 @@ struct Node { data: T, } // A node pointer is a *mutable raw pointer* to a node. -//@ Raw pointers (`*mut T` and `*const T`) are the Rust equivalent of pointers in C. Unlike references, they do not come with -//@ any guarantees: Raw pointers can be null, or they can point to garbage. They don't have a lifetime, either. +//@ Raw pointers (`*mut T` and `*const T`) are the Rust equivalent of pointers in C. Unlike +//@ references, they do not come with any guarantees: Raw pointers can be null, or they can point +//@ to garbage. They don't have a lifetime, either. type NodePtr = *mut Node; -// The linked list itself stores pointers to the first and the last node. In addition, we tell Rust that this type -// will own data of type `T`. -//@ The type `PhantomData` does not actually store anything in memory - it has size zero. However, logically, -//@ Rust will consider a `T` to be present. In this case, Rust knows that data of type `T` may be dropped -//@ whenever a `LinkedList` is dropped. Dropping has a lot of subtle checks to it, making sure that things can't go -//@ wrong. For this to work, Rust needs to know which types could potentially be dropped. In safe Rust, this can all be inferred -//@ automatically, but here, we just have a `*mut Node`, and we need to tell Rust that we actually own such data and will drop it. -//@ (For more of the glory details, see [this RFC](https://github.com/rust-lang/rfcs/blob/master/text/0769-sound-generic-drop.md).) +// The linked list itself stores pointers to the first and the last node. In addition, we tell Rust +// that this type will own data of type `T`. +//@ The type `PhantomData` does not actually store anything in memory - it has size zero. +//@ However, logically, Rust will consider a `T` to be present. In this case, Rust knows that data +//@ of type `T` may be dropped whenever a `LinkedList` is dropped. Dropping has a lot of subtle +//@ checks to it, making sure that things can't go wrong. For this to work, Rust needs to know +//@ which types could potentially be dropped. In safe Rust, this can all be inferred automatically, +//@ but here, we just have a `*mut Node`, and we need to tell Rust that we actually own such +//@ data and will drop it. +//@ (For more of the glory details, see +//@ [this RFC](https://github.com/rust-lang/rfcs/blob/master/text/0769-sound-generic-drop.md).) pub struct LinkedList { first: NodePtr, last: NodePtr, _marker: PhantomData, } -//@ Before we get to the actual linked-list methods, we write two short helper functions converting between mutable raw pointers, -//@ and boxed data. Both employ `mem::transmute`, which can convert anything to anything, by just re-interpreting the bytes. -//@ Clearly, that's an unsafe operation and must only be used with great care - or even better, not at all. Seriously. -//@ If at all possible, you should never use `transmute`.
-//@ We are making the assumption here that a `Box` and a raw pointer have the same representation in memory. In the future, -//@ Rust will [provide](https://doc.rust-lang.org/beta/alloc/boxed/struct.Box.html#method.from_raw) such [operations](https://doc.rust-lang.org/beta/alloc/boxed/struct.Box.html#method.into_raw) in the standard library, but the exact API is still being fleshed out. - -//@ We declare `raw_into_box` to be an `unsafe` function, telling Rust that calling this function is not generally safe. -//@ This grants us the unsafe powers for the body of the function: We can dereference raw pointers, and - most importantly - we -//@ can call unsafe functions. (The other unsafe powers won't be relevant here. Go read [The Rustonomicon](https://doc.rust-lang.org/nightly/nomicon/) -//@ if you want to learn all about this, but be warned - That Way Lies Madness.)
-//@ Here, the caller will have to ensure that `r` is a valid pointer, and that nobody else has a pointer to this data. +//@ Before we get to the actual linked-list methods, we write two short helper functions converting +//@ between mutable raw pointers, and boxed data. Both employ `mem::transmute`, which can convert +//@ anything to anything, by just re-interpreting the bytes. +//@ Clearly, that's an unsafe operation and must only be used with great care - or even better, not +//@ at all. Seriously. If at all possible, you should never use `transmute`.
+//@ We are making the assumption here that a `Box` and a raw pointer have the same representation +//@ in memory. In the future, Rust will +//@ [provide](https://doc.rust-lang.org/beta/alloc/boxed/struct.Box.html#method.from_raw) such +//@ [operations](https://doc.rust-lang.org/beta/alloc/boxed/struct.Box.html#method.into_raw) in the +//@ standard library, but the exact API is still being fleshed out. + +//@ We declare `raw_into_box` to be an `unsafe` function, telling Rust that calling this function +//@ is not generally safe. This grants us the unsafe powers for the body of the function: We can +//@ dereference raw pointers, and - most importantly - we can call unsafe functions. (The other +//@ unsafe powers won't be relevant here. Go read +//@ [The Rustonomicon](https://doc.rust-lang.org/nightly/nomicon/) if you want to learn all about +//@ this, but be warned - That Way Lies Madness.)
+//@ Here, the caller will have to ensure that `r` is a valid pointer, and that nobody else has a +//@ pointer to this data. unsafe fn raw_into_box(r: *mut T) -> Box { mem::transmute(r) } -//@ The case is slightly different for `box_into_raw`: Converting a `Box` to a raw pointer is always safe. It just drops some information. -//@ Hence we keep the function itself safe, and use an *unsafe block* within the function. This is an (unchecked) promise to the Rust -//@ compiler, saying that a safe invocation of `box_into_raw` cannot go wrong. We also have the unsafe powers in the unsafe block. +//@ The case is slightly different for `box_into_raw`: Converting a `Box` to a raw pointer is +//@ always safe. It just drops some information. Hence we keep the function itself safe, and use an +//@ *unsafe block* within the function. This is an (unchecked) promise to the Rust compiler, saying +//@ that a safe invocation of `box_into_raw` cannot go wrong. We also have the unsafe powers in the +//@ unsafe block. fn box_into_raw(b: Box) -> *mut T { unsafe { mem::transmute(b) } } impl LinkedList { - // A new linked list just contains null pointers. `PhantomData` is how we construct any `PhantomData`. + // A new linked list just contains null pointers. `PhantomData` is how we construct any + // `PhantomData`. pub fn new() -> Self { LinkedList { first: ptr::null_mut(), last: ptr::null_mut(), _marker: PhantomData } } @@ -81,7 +101,8 @@ impl LinkedList { // This function adds a new node to the end of the list. pub fn push_back(&mut self, t: T) { // Create the new node, and make it a raw pointer. - //@ Calling `box_into_raw` gives up ownership of the box, which is crucial: We don't want the memory that it points to to be deallocated! + //@ Calling `box_into_raw` gives up ownership of the box, which is crucial: We don't want + //@ the memory that it points to to be deallocated! let new = Box::new( Node { data: t, next: ptr::null_mut(), prev: self.last } ); let new = box_into_raw(new); // Update other pointers to this node. @@ -92,47 +113,50 @@ impl LinkedList { } else { debug_assert!(!self.first.is_null()); // We have to update the `next` pointer of the tail node. - //@ Since Rust does not know that a raw pointer actually points to anything, dereferencing such a pointer is - //@ an unsafe operation. So this unsafe block promises that the pointer will actually be valid. + //@ Since Rust does not know that a raw pointer actually points to anything, + //@ dereferencing such a pointer is an unsafe operation. So this unsafe block promises + //@ that the pointer will actually be valid. unsafe { (*self.last).next = new; } /*@*/ } // Make this the last node. self.last = new; } - // **Exercise 16.1**: Add some more operations to `LinkedList`: `pop_back`, `push_front` and `pop_front`. - // Add testcases for `push_back` and all of your functions. The `pop` functions should take `&mut self` - // and return `Option`. + // **Exercise 16.1**: Add some more operations to `LinkedList`: `pop_back`, `push_front` and + // `pop_front`. Add testcases for `push_back` and all of your functions. The `pop` functions + // should take `&mut self` and return `Option`. // Next, we are going to provide an iterator. - //@ This function just creates an instance of `IterMut`, the iterator type which does the actual work. + //@ This function just creates an instance of `IterMut`, the iterator type which does the actual + //@ work. pub fn iter_mut(&mut self) -> IterMut { IterMut { next: self.first, _marker: PhantomData } } } -//@ What does the iterator need to store? Strictly speaking, all it needs is the pointer to the next node -//@ that it is going to visit. However, how do we make sure that this pointer remains valid? We have to -//@ get this right ourselves, as we left the safe realms of borrowing and ownership. Remember that the -//@ key ingredient for iterator safety was to tie the lifetime of the iterator to the lifetime of the -//@ reference used for `iter_mut`. We will thus give `IterMut` two parameters: A type parameter specifying -//@ the type of the data, and a lifetime parameter specifying for how long the list was borrowed to the -//@ iterator. - -//@ For Rust to accept the type, we have to add two more annotations. First of all, we have to ensure that -//@ the data in the list lives at least as long as the iterator: If you drop the `T: 'a`, Rust will tell -//@ you to add it back. And secondly, Rust will complain if `'a` is not actually used in the struct. -//@ It doesn't know what it is supposed to do with that lifetime. So we use `PhantomData` again to tell -//@ it that in terms of ownership, this type actually (uniquely) borrows a linked list. This has no -//@ operational effect, but it means that Rust can deduce the intent we had when adding that -//@ seemingly useless lifetime parameter. +//@ What does the iterator need to store? Strictly speaking, all it needs is the pointer to the +//@ next node that it is going to visit. However, how do we make sure that this pointer remains +//@ valid? We have to get this right ourselves, as we left the safe realms of borrowing and +//@ ownership. Remember that the key ingredient for iterator safety was to tie the lifetime of the +//@ iterator to the lifetime of the reference used for `iter_mut`. We will thus give `IterMut` two +//@ parameters: A type parameter specifying the type of the data, and a lifetime parameter +//@ specifying for how long the list was borrowed to the iterator. + +//@ For Rust to accept the type, we have to add two more annotations. First of all, we have to +//@ ensure that the data in the list lives at least as long as the iterator: If you drop the `T: +//@ 'a`, Rust will tell you to add it back. And secondly, Rust will complain if `'a` is not +//@ actually used in the struct. It doesn't know what it is supposed to do with that lifetime. So +//@ we use `PhantomData` again to tell it that in terms of ownership, this type actually (uniquely) +//@ borrows a linked list. This has no operational effect, but it means that Rust can deduce the +//@ intent we had when adding that seemingly useless lifetime parameter. pub struct IterMut<'a, T> where T: 'a { next: NodePtr, _marker: PhantomData<&'a mut LinkedList>, } -//@ When implementing `Iterator` for `IterMut`, the fact that we have the lifetime `'a` around immediately -//@ pays of: We would not even be able to write down the type `Item` without that lifetime. +//@ When implementing `Iterator` for `IterMut`, the fact that we have the lifetime `'a` around +//@ immediately pays of: We would not even be able to write down the type `Item` without that +//@ lifetime. impl<'a, T> Iterator for IterMut<'a, T> { type Item = &'a mut T; @@ -151,35 +175,39 @@ impl<'a, T> Iterator for IterMut<'a, T> { } } -//@ In `next` above, we made crucial use of the assumption that `self.next` is either null or a valid pointer. -//@ This only works because if someone tries to delete elements from a list during iteration, we know that the borrow checker -//@ will catch them: If they call `next`, the lifetime `'a` we artificially added to the iterator has to still be -//@ active, which means the mutable reference passed to `iter_mut` is still active, which means nobody can delete -//@ anything from the list. In other words, we make use of the expressive type system of Rust, decorating -//@ our own unsafe implementation with just enough information so that Rust can check *uses* of the linked-list. -//@ If the type system were weaker, we could not write a linked-list like the above with a safe interface! +//@ In `next` above, we made crucial use of the assumption that `self.next` is either null or a +//@ valid pointer. This only works because if someone tries to delete elements from a list during +//@ iteration, we know that the borrow checker will catch them: If they call `next`, the lifetime +//@ `'a` we artificially added to the iterator has to still be active, which means the mutable +//@ reference passed to `iter_mut` is still active, which means nobody can delete anything from the +//@ list. In other words, we make use of the expressive type system of Rust, decorating our own +//@ unsafe implementation with just enough information so that Rust can check *uses* of the linked- +//@ list. If the type system were weaker, we could not write a linked-list like the above with a +//@ safe interface! -// **Exercise 16.2**: Add a method `iter` and a type `Iter` providing iteration for shared references. -// Add testcases for both kinds of iterators. +// **Exercise 16.2**: Add a method `iter` and a type `Iter` providing iteration for shared +// references. Add testcases for both kinds of iterators. // ## `Drop` -//@ The linked list we wrote is already working quite nicely, but there is one problem: When the list is dropped, -//@ nobody bothers to deallocate the remaining nodes. Even worse, if `T` itself has a destructor that needs to -//@ clean up, it is not called for the element remaining in the list. We need to take care of that ourselves. - -//@ In Rust, adding a destructor for a type is done by implementing the `Drop` trait. This is a very special trait. -//@ It can only be implemented for *nominal types*, i.e., you cannot implement `Drop` for `&mut T`. You also cannot -//@ restrict the type and lifetime parameters further than the type does - the `Drop` implementation has to apply to *all* instances -//@ of `LinkedList`. +//@ The linked list we wrote is already working quite nicely, but there is one problem: When the +//@ list is dropped, nobody bothers to deallocate the remaining nodes. Even worse, if `T` itself +//@ has a destructor that needs to clean up, it is not called for the element remaining in the +//@ list. We need to take care of that ourselves. + +//@ In Rust, adding a destructor for a type is done by implementing the `Drop` trait. This is a +//@ very special trait. It can only be implemented for *nominal types*, i.e., you cannot implement +//@ `Drop` for `&mut T`. You also cannot restrict the type and lifetime parameters further than the +//@ type does - the `Drop` implementation has to apply to *all* instances of `LinkedList`. impl Drop for LinkedList { - // The destructor itself is a method which takes `self` in mutably borrowed form. It cannot own `self`, because then - // the destructor of `self` would be called at the end of the function, resulting in endless recursion. + // The destructor itself is a method which takes `self` in mutably borrowed form. It cannot own + // `self`, because then the destructor of `self` would be called at the end of the function, + // resulting in endless recursion. fn drop(&mut self) { let mut cur_ptr = self.first; while !cur_ptr.is_null() { - // In the destructor, we just iterate over the entire list, successively obtaining ownership - // (`Box`) of every node. When the box is dropped, it will call the destructor on `data` if - // necessary, and subsequently free the node on the heap. + // In the destructor, we just iterate over the entire list, successively obtaining + // ownership (`Box`) of every node. When the box is dropped, it will call the destructor + // on `data` if necessary, and subsequently free the node on the heap. //@ We call `drop` explicitly here just for documentation purposes. let cur = unsafe { raw_into_box(cur_ptr) }; cur_ptr = cur.next; @@ -189,12 +217,14 @@ impl Drop for LinkedList { } // ## The End -//@ Congratulations! You completed Rust-101. This was the last part of the course. I hope you enjoyed it. -//@ If you have feedback or want to contribute yourself, please head to the [Rust-101](https://www.ralfj.de/projects/rust-101/) website -//@ fur further information. The entire course is open-source (under [CC-BY-SA 4.0](https://creativecommons.org/licenses/by-sa/4.0/)). +//@ Congratulations! You completed Rust-101. This was the last part of the course. I hope you +//@ enjoyed it. If you have feedback or want to contribute yourself, please head to the +//@ [Rust-101](https://www.ralfj.de/projects/rust-101/) website fur further information. The entire +//@ course is open-source (under [CC-BY-SA 4.0](https://creativecommons.org/licenses/by-sa/4.0/)). //@ -//@ If you want to do more, the examples you saw in this course provide lots of playground for coming up with your own little -//@ extensions here and there. The [index](main.html) contains some more links to additional resources you may find useful. +//@ If you want to do more, the examples you saw in this course provide lots of playground for +//@ coming up with your own little extensions here and there. The [index](main.html) contains some +//@ more links to additional resources you may find useful. //@ With that, there's only one thing left to say: Happy Rust Hacking! //@ [index](main.html) | [previous](part15.html) | [raw source](workspace/src/part16.rs) | next