From: Ralf Jung Date: Tue, 16 Jun 2015 11:45:35 +0000 (+0200) Subject: lots of work on parts 05 and 06 X-Git-Url: https://git.ralfj.de/rust-101.git/commitdiff_plain/e2374eed1c3ae8d0063138ea011e86bbd42473ab?ds=inline lots of work on parts 05 and 06 --- diff --git a/src/main.rs b/src/main.rs index bf1c08c..88a4e96 100644 --- a/src/main.rs +++ b/src/main.rs @@ -64,12 +64,13 @@ // // I suggest you get started with [the first part](part00.html), or jump directly to where you left off: // -// * [Part 00](part00.html) -// * [Part 01](part01.html) -// * [Part 02](part02.html) -// * [Part 03](part03.html) -// * [Part 04](part04.html) -// * [Part 05](part05.html) (WIP) +// * [Part 00: Algebraic datatypes](part00.html) +// * [Part 01: Expressions, Inherent methods](part01.html) +// * [Part 02: Generic types, Traits](part02.html) +// * [Part 03: Input](part03.html) +// * [Part 04: Ownership, Borrowing](part04.html) +// * [Part 05: Clone](part05.html) (WIP) +// * [Part 06: Copy](part06.html) (WIP) // * (to be continued) #![allow(dead_code, unused_imports, unused_variables, unused_mut)] mod part00; @@ -80,6 +81,7 @@ mod part04; mod part05; mod part06; mod part07; +mod part08; // To actually run the code of some part (after filling in the blanks, if necessary), simply edit the `main` // function. diff --git a/src/part00.rs b/src/part00.rs index c922007..4deb829 100644 --- a/src/part00.rs +++ b/src/part00.rs @@ -7,6 +7,7 @@ // We are going to make use of the standard library, so let's import that: use std; +// ## Getting started // Let us start by thinking about the *type* of our function. Rust forces us to give the types of // all arguments, and the return type, before we even start writing the body. In the case of our minimum // function, we may be inclined to say that it returns a number. But then we would be in trouble: What's diff --git a/src/part01.rs b/src/part01.rs index 8b14050..dc97d8d 100644 --- a/src/part01.rs +++ b/src/part01.rs @@ -9,6 +9,7 @@ use std; // terms you write down are not just *statements* (executing code), but *expressions* // (returning a value). This applies even to the body of entire functions! +// ## Expression-based programming // For example, consider `sqr`: fn sqr(i: i32) -> i32 { i * i } // Between the curly braces, we are giving the *expression* that computes the return value. @@ -55,6 +56,7 @@ fn vec_min(v: Vec) -> NumberOrNothing { // Now that's already much shorter! Make sure you can go over the code above and actually understand // every step of what's going on. +// ## Inherent implementations // So much for `vec_min`. Let us now reconsider `print_number_or_nothing`. That function // really belongs pretty close to the type `NumberOrNothing`. In C++ or Java, you would // probably make it a method of the type. In Rust, we can achieve something very similar diff --git a/src/part02.rs b/src/part02.rs index d0cbbf7..f81672b 100644 --- a/src/part02.rs +++ b/src/part02.rs @@ -8,6 +8,8 @@ use std; // we want a `CharOrNothing`, and later a `FloatOrNothing`? Certainly we don't // want to re-write the type and all its inherent methods. +// ## Generic datatypes + // The solution to this is called *generics* or *polymorphism* (the latter is Greek, // meaning "many shapes"). You may know something similar from C++ (where it's called // *templates*) or Java, or one of the many functional languages. So here, we define @@ -27,9 +29,9 @@ type NumberOrNothing = SomethingOrNothing; // Go check out its [documentation](http://doc.rust-lang.org/stable/std/option/index.html)! // (And don't worry, there's indeed lots of material mentioned there that we did not cover yet.) +// ## Generic `impl`, Static functions // The types are so similar, that we can provide a generic function to construct a `SomethingOrNothing` // from an `Option`, and vice versa. - // **Exercise 02.1**: Implement such functions! I provided a skeleton of the solution. Here, // `panic!` is another macro. This one terminates execution with the given message. // @@ -58,6 +60,7 @@ fn call_constructor(x: i32) -> SomethingOrNothing { SomethingOrNothing::new(Some(x)) } +// ## Traits // Now that we have a generic `SomethingOrNothing`, wouldn't it be nice to also gave a generic // `vec_min`? Of course, we can't take the minimum of a vector of *any* type. It has to be a type // supporting a `min` operation. Rust calls such properties that we may demand of types *traits*. @@ -108,6 +111,7 @@ pub fn vec_min(v: Vec) -> SomethingOrNothing { // This behavior is similar to C++ templates. The optimizer (Rust is using LLVM) then has all the // information it could want to, e.g., inline function calls. +// ## Trait implementations // To make the function usable with a `Vec`, we implement the `Minimum` trait for `i32`. impl Minimum for i32 { fn min(self, b: Self) -> Self { @@ -115,11 +119,10 @@ impl Minimum for i32 { } } -// In order to run our code and see the result, we again provide a `print` function. -// This also shows that we can have multiple `impl` blocks for the same type (remember -// that `NumberOrNothing` is just a type alias for `SomethingOrNothing`), and we -// can provide some methods only for certain instances of a generic type. -impl NumberOrNothing{ +// We again provide a `print` function. This also shows that we can have multiple `impl` blocks +// for the same type (remember that `NumberOrNothing` is just a type alias for `SomethingOrNothing`), +// and we can provide some methods only for certain instances of a generic type. +impl NumberOrNothing { pub fn print(self) { match self { Nothing => println!("The number is: "), diff --git a/src/part04.rs b/src/part04.rs index 47177a7..0ecfe76 100644 --- a/src/part04.rs +++ b/src/part04.rs @@ -13,10 +13,9 @@ use std::cmp; *first = 1337; // This is bad! } */ -// What's going wrong here? `first` is a pointer into the vector `v`. -// The operation `push_back` may re-allocate the storage for the vector, -// in case the old buffer was full. If that happens, `first` is now -// a dangling pointer, and accessing it can crash the program (or worse). +// What's going wrong here? `first` is a pointer into the vector `v`. The operation `push_back` +// may re-allocate the storage for the vector, in case the old buffer was full. If that happens, +// `first` is now a dangling pointer, and accessing it can crash the program (or worse). // // It turns out that only the combination of two circumstances can lead to such a bug: // *aliasing* and *mutation*. In the code above, we have `first` and the buffer of `v` @@ -24,6 +23,7 @@ use std::cmp; // Therefore, the central principle of the Rust typesystem is to *rule out mutation in the presence // of aliasing*. The core tool to achieve that is the notion of *ownership*. +// ## Ownership // What does that mean in practice? Consider the following example. fn work_on_vector(v: Vec) { /* do something */ } fn ownership_demo() { @@ -37,7 +37,7 @@ fn ownership_demo() { // Passing a `Vec` to `work_on_vector` is considered *transfer of ownership*: Someone used // to own that vector, but now he gave it on to `take` and has no business with it anymore. // -// If you give a book to your friend, you cannot come to his place next day and get the book! +// If you give a book to your friend, you cannot just come to his place next day and get the book! // It's no longer yours. Rust makes sure you don't break this rule. Try enabling the commented // line in `ownership_demo`. Rust will tell you that `v` has been *moved*, which is to say that ownership // has been transferred somewhere else. In this particular case, the buffer storing the data @@ -45,6 +45,7 @@ fn ownership_demo() { // Essentially, ownership rules out aliasing, hence making the kind of problem discussed above // impossible. +// ## Shared borrowing // If you go back to our example with `vec_min`, and try to call that function twice, you will // get the same error. That's because `vec_min` demands that the caller transfers ownership of the // vector. Hence, when `vec_min` finishes, the entire vector is deleted. That's of course not what @@ -62,14 +63,13 @@ fn ownership_demo() { // introduces aliasing, so in order to live up to its promise of safety, Rust does not allow // mutation through a shared borrow. -// So, let's re-write `vec_min` to work on a shared borrow of a vector. In fact, the only -// thing we have to change is the type of the function. `&Vec` says that we need -// a vector, but we won't own it. I also took the liberty to convert the function from -// `SomethingOrNothing` to the standard library type `Option`. +// So, let's re-write `vec_min` to work on a shared borrow of a vector, written `&Vec`. +// I also took the liberty to convert the function from `SomethingOrNothing` to the standard +// library type `Option`. fn vec_min(v: &Vec) -> Option { let mut min = None; for e in v { - // In the loop, `e` now has type `&i32`, so we have to dereference it. + // In the loop, `e` now has type `&i32`, so we have to dereference it to obtain an `i32`. min = Some(match min { None => *e, Some(n) => cmp::min(n, *e) @@ -95,18 +95,20 @@ fn shared_borrow_demo() { // borrow, Rust knows that it cannot mutate `v` in any way. Hence the pointer into the buffer of `v` // that was created before calling `vec_min` remains valid. +// ## Mutable borrowing // There is a second kind of borrow, a *mutable borrow*. As the name suggests, such a borrow permits // mutation, and hence has to prevent aliasing. There can only ever be one mutable borrow to a // particular piece of data. // As an example, consider a function which increments every element of a vector by 1. +// The type `&mut Vec` is the type of mutable borrows of `vec`. Because the borrow is +// mutable, we can change `e` in the loop. fn vec_inc(v: &mut Vec) { for e in v { *e += 1; } } -// The type `&mut Vec` is the type of mutable borrows of `vec`. Because the borrow is -// mutable, we can change `e` in the loop. How can we call this function? +// Here's an example of calling `vec_inc`. fn mutable_borrow_demo() { let mut v = vec![5,4,3,2,1]; /* let first = &v[0]; */ @@ -114,31 +116,18 @@ fn mutable_borrow_demo() { vec_inc(&mut v); /* println!("The first element is: {}", *first); */ } -// `&mut` is the operator to create a mutable borrow. We have to mark `v` as mutable in order -// to create such a borrow. Because the borrow passed to `vec_inc` only lasts as -// long as the function call, we can still call `vec_inc` on the same vector twice: -// The durations of the two borrows do not overlap, so we never have more than one mutable borrow. -// However, we can *not* create a shared borrow that spans a call to `vec_inc`. Just try +// `&mut` is the operator to create a mutable borrow. We have to mark `v` as mutable in order to create such a +// borrow. Because the borrow passed to `vec_inc` only lasts as long as the function call, we can still call +// `vec_inc` on the same vector twice: The durations of the two borrows do not overlap, so we never have more +// than one mutable borrow. However, we can *not* create a shared borrow that spans a call to `vec_inc`. Just try // enabling the commented-out lines, and watch Rust complain. This is because `vec_inc` could mutate // the vector structurally (i.e., it could add or remove elements), and hence the pointer `first` -// could become invalid. +// could become invalid. In other words, Rust keeps us safe from bugs like the one in the C++ snipped above. // // Above, I said that having a mutable borrow excludes aliasing. But if you look at the code above carefully, // you may say: "Wait! Don't the `v` in `mutable_borrow_demo` and the `v` in `vec_inc` alias?" And you are right, // they do. However, the `v` in `mutable_borrow_demo` is not actually usable, it is not *active*: As long as there is an -// outstanding borrow, Rust will not allow you to do anything with `v`. This is, in fact, what -// prevents the creation of a mutable borrow when there already is a shared one, as you witnessed -// when enabling `first` above. - -// This also works the other way around: In `multiple_borrow_demo`, there is already a mutable borrow -// active in the `vec_min` line, so the attempt to create another shared borrow is rejected by the compiler. -fn multiple_borrow_demo() { - let mut v = vec![5,4,3,2,1]; - let first = &mut v[0]; - /* vec_min(&v); */ - *first += 1; - println!("The first element is now: {}", *first); -} +// outstanding borrow, Rust will not allow you to do anything with `v`. // So, to summarize - the ownership and borrowing system of Rust enforces the following three rules: // @@ -149,4 +138,4 @@ fn multiple_borrow_demo() { // As it turns out, combined with the abstraction facilities of Rust, this is a very powerful mechanism // to tackle many problems beyond basic memory safety. You will see some examples for this soon. -// [index](main.html) | [previous](part03.html) | [next](main.html) +// [index](main.html) | [previous](part03.html) | [next](part05.html) diff --git a/src/part05.rs b/src/part05.rs index f031f09..d7cf64a 100644 --- a/src/part05.rs +++ b/src/part05.rs @@ -1,9 +1,7 @@ -// Rust-101, Part 05: Clone, Copy -// ============================== - -use std::cmp; -use std::ops; +// Rust-101, Part 05: Clone +// ======================== +// ## Big Numbers // In the course of the next few parts, we are going to build a data-structure for // computations with *bug* numbers. We would like to not have an upper bound // to how large these numbers can get, with the memory of the machine being the @@ -64,6 +62,7 @@ impl BigInt { } } +// ## Cloning // If you have a close look at the type of `BigInt::from_vec`, you will notice that it // consumes the vector `v`. The caller hence loses access. There is however something // we can do if we don't want that to happen: We can explicitly `clone` the vector, @@ -71,9 +70,12 @@ impl BigInt { // `clone` takes a borrowed vector, and returns a fully owned one. fn clone_demo() { let v = vec![0,1 << 16]; - let b1 = BigInt::from_vec(v.clone()); + let b1 = BigInt::from_vec((&v).clone()); let b2 = BigInt::from_vec(v); } +// Rust has special treatment for methods that borrow its `self` argument (like `clone`, or +// like `test_invariant` above): It is not necessary to explicitly borrow the receiver of the +// method. Hence you could replace `(&v).clone()` by `v.clone()` above. Just try it! // To be clonable is a property of a type, and as such, naturally expressed with a trait. // In fact, Rust already comes with a trait `Clone` for exactly this purpose. We can hence @@ -84,8 +86,8 @@ impl Clone for BigInt { } } // Making a type clonable is such a common exercise that Rust can even help you doing it: -// If you add `#[derive(Clone)]' right in front of the definition of `BigInt`, Rust will -// generate an implementation of `clone` that simply clones all the fields. Try it! +// If you add `#[derive(Clone)]` right in front of the definition of `BigInt`, Rust will +// generate an implementation of `Clone` that simply clones all the fields. Try it! // We can also make the type `SomethingOrNothing` implement `Clone`. However, that // can only work if `T` is `Clone`! So we have to add this bound to `T` when we introduce @@ -100,7 +102,7 @@ impl Clone for SomethingOrNothing { // `Something(v)`, that would indicate that we *own* `v` in the code // after the arrow. That can't work though, we have to leave `v` owned by // whoever called us - after all, we don't even own `self`, we just borrowed it. - // By writing `Something(ref v)`, we just borrow `v` for the duration of the match + // By writing `Something(ref v)`, we borrow `v` for the duration of the match // arm. That's good enough for cloning it. Something(ref v) => Something(v.clone()), } @@ -109,106 +111,36 @@ impl Clone for SomethingOrNothing { // Again, Rust will generate this implementation automatically if you add // `#[derive(Clone)]` right before the definition of `SomethingOrNothing`. -// With `BigInt` being about numbers, we should be able to write a version of `vec_min` -// that computes the minimum of a list of `BigInt`. We start by writing `min` for -// `BigInt`. Now our assumption of having no trailing zeros comes in handy! -impl BigInt { - fn min(self, other: Self) -> Self { - // Just to be sure, we first check that both operands actually satisfy our invariant. - // `debug_assert!` is a macro that checks that its argument (must be of type `bool`) - // is `true`, and panics otherwise. It gets removed in release builds, which you do with - // `cargo build --release`. - // - // If you carefully check the type of `BigInt::test_invariant`, you may be surprised that - // we can call the function this way. Doesn't it take `self` in borrowed form? Indeed, - // the explicit way to do that would be to call `(&other).test_invariant()`. However, the - // `self` argument of a method is treated specially by Rust, and borrowing happens automatically here. - debug_assert!(self.test_invariant() && other.test_invariant()); - // If the lengths of the two numbers differ, we already know which is larger. - if self.data.len() < other.data.len() { - self - } else if self.data.len() > other.data.len() { - other - } else { - // **Exercise 05.1**: Fill in this code. - panic!("Not yet implemented."); - } - } +// ## Mutation + aliasing considered harmful (part 2) +// Now that we know how to borrow a part of an `enum` (like `v` above), there's another example for why we +// have to rule out mutation in the presence of aliasing. First, we define an `enum` that can hold either +// a number, or a string. +enum Variant { + Number(i32), + Text(String), } - -// Now we can write `vec_min`. In order to make it type-check, we have to write it as follows. -fn vec_min(v: &Vec) -> Option { - let mut min: Option = None; - for e in v { - min = Some(match min { - None => e.clone(), - Some(n) => e.clone().min(n) - }); +// Now consider the following piece of code. Like above, `n` will be a borrow of a part of `var`, +// and since we wrote `ref mut`, they will be mutable borrows. In other words, right after the match, `ptr` +// points to the number that's stored in `var`, where `var` is a `Number`. Remember that `_` means +// "we don't care". +fn work_on_variant(mut var: Variant, text: String) { + let mut ptr: &mut i32; + match var { + Variant::Number(ref mut n) => ptr = n, + Variant::Text(_) => return, } - min + /* var = Variant::Text(text); */ + *ptr = 1337; } -// Now, what's happening here? Why do we have to write `clone()`, and why did we not -// have to write that in our previous version? -// -// The answer is already hidden in the type of `vec_min`: `v` is just borrowed, but -// the Option that it returns is *owned*. We can't just return one -// of the elements of `v`, as that would mean that it is no longer in the vector! -// In our code, this comes up when we update the intermediate variable `min`, which -// also has type `Option`. If you replace `e.clone()` in the `None` arm -// with `*e`, Rust will complain "Cannot move out of borrowed content". That's because -// `e` is a `&BigInt`. Assigning `min` to `*e` works just like a function call: -// Ownership of the underlying data (in this case, the digits) is transferred from -// the vector to `min`. But that's not allowed, since we must retain the vector -// in its existing state. After cloning `e`, we own the copy that was created, -// and hence we can store it in `min`.
-// Of course, making such a full copy is expensive, so we'd like to avoid it. -// That's going to happen in the next part. -// -// But before we go there, I should answer the second question I brought up above: -// Why did our old `vec_min` work? We stored the minimal `i32` locally without -// cloning, and Rust did not complain. That's because there isn't really much -// of an "ownership" when it comes to types like `i32` or `bool`: If you move -// the value from one place to another, then both instance are independent -// and complete instances of their type. This is in stark contrast to types -// like `Vec`, where merely moving the value results in both the old -// and the new vector to point to the same underlying buffer. -// -// Rust calls types like `i32` that can be freely duplicated `Copy` types. -// `Copy` is another trait, and it is implemented for the basic types of -// the language. Remember how we defined the trait `Minimum` by writing -// `trait Minimum : Copy { ...`? This tells Rust that every type that -// implements `Minimum` must also implement `Copy`, and that's why Rust -// accepted our generic `vec_min` in part 02. -// -// Curiously, `Copy` is a trait that does not require any method to -// be implemented. Implementing `Copy` is merely a semantic statement, -// saying that the idea of ownership does not really apply to this type. -// Traits without methods are called *marker traits*. We will see some -// more examples of such traits later. +// Now, imagine what would happen if we were permitted to also mutate `var`. We could, for example, +// make it a `Text`. However, `ptr` still points to the old location! Hence `ptr` now points somewhere +// into the representation of a `String`. By changing `ptr`, we manipulate the string in completely +// unpredictable ways, and anything could happen if we were to use it again! (Technically, the first field +// of a `String` is a pointer to its character data, so by overwriting that pointer with an integer, +// we make it a completely invalid address. When the destructor of `var` runs, it would try to deallocate +// that address, and Rust would eat your laundry - or whatever.) // -// If you try to implement `Copy` for `BigInt`, you will notice that Rust -// does not let you do that. A type can only be `Copy` if all its elements -// are `Copy`, and that's not the case for `BigInt`. However, we can make -// `SomethingOrNothing` copy if `T` is `Copy`. -impl Copy for SomethingOrNothing{} -// Again, Rust can generate implementations of `Copy` automatically. If -// you add `#[derive(Copy,Clone)]` right before the definition of `SomethingOrNothing`, -// both `Copy` and `Clone` will automatically be implemented. +// I hope this example clarifies why Rust has to rule out mutation in the presence of aliasing *in general*, +// not just for the specific -// In closing this part, I'd like to give you another perspective on the -// move semantics (i.e., ownership passing) that Rust applies, and how -// `Copy` and `Clone` fit.
-// When Rust code is executed, passing a value (like `i32` or `Vec`) -// to a function will always result in a shallow copy being performed: Rust -// just copies the bytes representing that value, and considers itself done. -// That's just like the default copy constructor in C++. Rust, however, will -// consider this a destructive operation: After copying the bytes elsewhere, -// the original value must no longer be used. After all, the two could not -// share a pointer! If, however, you mark a type `Copy`, then Rust will *not* -// consider a move destructive, and just like in C++, the old and new value -// can happily coexist. Now, Rust does not allow to to overload the copy -// constructor. This means that passing a value around will always be a fast -// operation, no allocation or copying of large data of the heap will happen. -// In the situations where you would write a copy constructor in C++ (and hence -// incur a hidden cost on every copy of this type), you'd have the type *not* -// implement `Copy`, but only `Clone`. This makes the cost explicit. +// [index](main.html) | [previous](part04.html) | [next](part06.html) diff --git a/src/part06.rs b/src/part06.rs index 21046c6..26fa124 100644 --- a/src/part06.rs +++ b/src/part06.rs @@ -1,56 +1,150 @@ -// Rust-101, Part 06: Lifetimes, Testing -// ===================================== +// Rust-101, Part 06: Copy +// ======================= -use std::cmp; -use std::ops; -use std::fmt; use part05::BigInt; - -impl PartialEq for BigInt { - fn eq(&self, other: &BigInt) -> bool { +// With `BigInt` being about numbers, we should be able to write a version of `vec_min` +// that computes the minimum of a list of `BigInt`. We start by writing `min` for +// `BigInt`. Now our assumption of having no trailing zeros comes in handy! +impl BigInt { + fn min(self, other: Self) -> Self { + // Just to be sure, we first check that both operands actually satisfy our invariant. + // `debug_assert!` is a macro that checks that its argument (must be of type `bool`) + // is `true`, and panics otherwise. It gets removed in release builds, which you do with + // `cargo build --release`. debug_assert!(self.test_invariant() && other.test_invariant()); - self.data == other.data + // If the lengths of the two numbers differ, we already know which is larger. + if self.data.len() < other.data.len() { + self + } else if self.data.len() > other.data.len() { + other + } else { + // **Exercise 05.1**: Fill in this code. + panic!("Not yet implemented."); + } } } -fn call_eq() { - let b1 = BigInt::new(13); - let b2 = BigInt::new(37); - println!("b1 == b1: {} ; b1 == b2: {}; b1 != b2: {}", b1 == b1, b1 == b2, b1 != b2); +// Now we can write `vec_min`. In order to make it type-check, we have to write it as follows. +fn vec_min(v: &Vec) -> Option { + let mut min: Option = None; + for e in v { + min = Some(match min { + None => e.clone(), + Some(n) => e.clone().min(n) + }); + } + min } +// Now, what's happening here? Why do we have to write `clone()`, and why did we not +// have to write that in our previous version? +// +// The answer is already hidden in the type of `vec_min`: `v` is just borrowed, but +// the Option that it returns is *owned*. We can't just return one of the elements of `v`, +// as that would mean that it is no longer in the vector! In our code, this comes up when we update +// the intermediate variable `min`, which also has type `Option`. If you replace `e.clone()` +// in the `None` arm with `*e`, Rust will complain "Cannot move out of borrowed content". That's because +// `e` is a `&BigInt`. Assigning `min = Some(*e)` works just like a function call: Ownership of the +// underlying data is transferred from where `e` borrows from to `min`. But that's not allowed, since +// we just borrowed `e`, so we cannot empty it! We can, however, call `clone()` on it. Then we own +// the copy that was created, and hence we can store it in `min`.
+// Of course, making such a full copy is expensive, so we'd like to avoid it. We'll some to that soon. +// ## `Copy` types +// But before we go there, I should answer the second question I brought up above: Why did our old `vec_min` work? +// We stored the minimal `i32` locally without cloning, and Rust did not complain. That's because there isn't +// really much of an "ownership" when it comes to types like `i32` or `bool`: If you move the value from one +// place to another, then both instance are "complete". We also say the value has been *duplicated*. This is in +// stark contrast to types like `Vec`, where moving the value results in both the old and the new vector to +// point to the same underlying buffer. We don't have two vectors, there's no duplication. +// +// Rust calls types that can be freely duplicated `Copy` types. `Copy` is another trait, and it +// is implemented for types like `i32` and `bool`. Remember how we defined the trait `Minimum` by writing +// `trait Minimum : Copy { ...`? This tells Rust that every type that implements `Minimum` must also +// implement `Copy`, and that's why the compiler accepted our generic `vec_min` in part 02. +// `Copy` is the first *marker trait* that we encounter: It does not provide any methods, but +// makes a promise about the behavior of the type - in this case, being duplicable. -impl fmt::Debug for BigInt { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - self.data.fmt(f) - } -} +// If you try to implement `Copy` for `BigInt`, you will notice that Rust +// does not let you do that. A type can only be `Copy` if all its elements +// are `Copy`, and that's not the case for `BigInt`. However, we can make +// `SomethingOrNothing` copy if `T` is `Copy`. +use part02::{SomethingOrNothing,Something,Nothing}; +impl Copy for SomethingOrNothing{} +// Again, Rust can generate implementations of `Copy` automatically. If +// you add `#[derive(Copy,Clone)]` right before the definition of `SomethingOrNothing`, +// both `Copy` and `Clone` will automatically be implemented. +// ## An operational perspective +// Instead of looking at what happens "at the surface" (i.e., visible in Rust), one can also explain +// ownership passing and how `Copy` and `Clone` fit by looking at what happens on the machine.
+// When Rust code is executed, passing a value (like `i32` or `Vec`) to a function will always +// result in a shallow copy being performed: Rust just copies the bytes representing that value, and +// considers itself done. That's just like the default copy constructor in C++. Rust, however, will +// consider this a destructive operation: After copying the bytes elsewhere, the original value must +// no longer be used. After all, the two could not share a pointer! If, however, you mark a type `Copy`, +// then Rust will *not* consider a move destructive, and just like in C++, the old and new value +// can happily coexist. Now, Rust does not allow to to overload the copy constructor. This means that +// passing a value around will always be a fast operation, no allocation or any other kind of heap access +// will happen. In the situations where you would write a copy constructor in C++ (and hence +// incur a hidden cost on every copy of this type), you'd have the type *not* implement `Copy`, but only +// `Clone`. This makes the cost explicit. +// ## Lifetimes +// To fix the performance problems of `vec_min`, we need ti avoid using `clone()`. We'd like +// the return value to not be owned (remember that this was the source of our need for cloning), but *borrowed*. -impl BigInt { - pub fn inc(&mut self, mut by: u64) { - panic!("Not yet implemented."); +// This is demonstrated by the function `head` that borrows the first element of a vector if it is non-empty. +// The type of the function says that it will either return nothing, or it will return a borrowed `T`. +// We can then borrow the first element of `v` and use it to construct the return value. +fn head(v: &Vec) -> Option<&T> { + if v.len() > 0 { + Some(&v[0]) + } else { + None } } - -#[test] -fn test_inc() { - let mut b = BigInt::new(1337); - b.inc(1337); - assert!(b == BigInt::new(1337 + 1337)); - - b = BigInt::new(0); - assert_eq!(b, BigInt::from_vec(vec![0])); - b.inc(1 << 63); - assert_eq!(b, BigInt::from_vec(vec![1 << 63])); - b.inc(1 << 63); - assert_eq!(b, BigInt::from_vec(vec![0, 1])); - b.inc(1 << 63); - assert_eq!(b, BigInt::from_vec(vec![1 << 63, 1])); - b.inc(1 << 63); - assert_eq!(b, BigInt::from_vec(vec![0, 2])); +// Now, coming back to `head` - here, we are returning a pointer to the first element. But doesn't +// that mean that callers have to be careful? Imagine `head` would be a C++ function, and we would +// write the following code. +/* + int foo(std::vector v) { + int *first = head(v); + v.push_back(42); + return *first; + } +*/ +// This is very much like our very first motivating example for ownership, at the beginning of part 04. +// But this time, the bug is hidden behind the call to `head`. How does Rust solve this? If we translate +// the code above to Rust, it doesn't compile, so clearly we are good - but how and why? +// (Notice that have to explicitly assert using `unwrap` that `first` is not `None`, whereas the C++ code +// above would silently dereference a `NULL`-pointer. But that's another point.) +fn rust_foo(mut v: Vec) -> i32 { + let first: Option<&i32> = head(&v); + /* v.push(42); */ + *first.unwrap() } +// To give the answer to this question, we have to talk about the *lifetime* of a borrow. The point is, saying that +// you borrowed your friend a `Vec`, or a book, is not good enough, unless you also agree on *how long* +// your friend can borrow. After all, you need to know when you can rely on owning your data (or book) again. +// +// Every borrow in Rust has an associated lifetime. The full type of `head` reads as follows: +// `fn<'a, T>(&'a Vec) -> Option<&'a T>`. Here, `'a` is a *lifetime variable*, which represents how long the vector has +// been borrowed. The function type expresses that argument and return value have *the same lifetime*. +// +// When analyzing the code of `rust_foo`, Rust has to assign a lifetime to `first`. It will choose the scope +// where `first` is valid, which is the entire rest of the function. Because `head` ties the lifetime of its +// argument and return value together, this means that `&v` also has to borrow `v` for the entire duration of +// the function. So when we try to borrow `v` mutable for `push`, Rust complains that the two borrows (the one +// for `head`, and the one for `push`) overlap. Lucky us! Rust caught our mistake and made sure we don't crash the program. +// +// So, to sum this up: Lifetimes enable Rust to reason about *how long* a pointer has been borrowed. We can thus +// safely write functions like `head`, that return pointers into data they got as argument, and make sure they +// are used correctly, *while looking only at the function type*. At no point in our analysis of `rust_foo` did +// we have to look *into* `head`. That's, of course, crucial if we want to separate library code from application code. +// Most of the time, we don't have to explicitly add lifetimes to function types. This is thanks to *lifetimes elision*, +// where Rust will automatically insert lifetimes we did not specify, following some [simple, well-documented rules](http://doc.rust-lang.org/stable/book/lifetimes.html#lifetime-elision). + +// [index](main.html) | [previous](part05.html) | [next](main.html) diff --git a/src/part07.rs b/src/part07.rs index e9715e0..92da555 100644 --- a/src/part07.rs +++ b/src/part07.rs @@ -1,30 +1,54 @@ -use std::cmp; -use std::ops; use part05::BigInt; -// Add with carry, returning the sum and the carry -fn overflowing_add(a: u64, b: u64, carry: bool) -> (u64, bool) { - let sum = u64::wrapping_add(a, b); - if sum >= a { - panic!("First addition did not overflow. Not implemented."); - } else { - panic!("First addition *did* overflow. Not implemented."); - } +pub trait Minimum { + /// Return the smaller of the two + fn min<'a>(&'a self, other: &'a Self) -> &'a Self; } -/*#[test]*/ -fn test_overflowing_add() { - assert_eq!(overflowing_add(10, 100, false), (110, false)); - assert_eq!(overflowing_add(10, 100, true), (111, false)); - assert_eq!(overflowing_add(1 << 63, 1 << 63, false), (0, true)); - assert_eq!(overflowing_add(1 << 63, 1 << 63, true), (1, true)); - assert_eq!(overflowing_add(1 << 63, (1 << 63) -1 , true), (0, true)); +/// Return a pointer to the minimal value of `v`. +pub fn vec_min(v: &Vec) -> Option<&T> { + let mut min = None; + for e in v { + min = Some(match min { + None => e, + Some(n) => e.min(n) + }); + } + min } +// Notice that `Option<&T>` is technically (leaving the borrowing story aside) a pointer to a `T`, +// that could optionally be invalid. In other words, it's just like a pointer in C(++) or Java +// that can be `NULL`! However, thanks to `Option` being an `enum`, we cannot forget to check +// the pointer for validity, avoiding the safety issues of C(++). At the same time, when we +// have a borrow like `v` above that's not an `Option`, we *know* that is has to be a valid +// pointer, so we don't even need to do a `NULL`-check.
+// Also, if you are worried about wasting space, notice that Rust knows that `&T` can never be +// `NULL`, and hence optimizes `Option<&T>` to be no larger than `&T`. The `None` case is represented +// as `NULL`. This is another great example of a zero-cost abstraction: `Option<&T>` is exactly like +// a pointer in C(++), if you look at what happens during execution - but it's much safer to use. -impl ops::Add for BigInt { - type Output = BigInt; - fn add(self, rhs: BigInt) -> Self::Output { - let mut result_vec:Vec = Vec::with_capacity(cmp::max(self.data.len(), rhs.data.len())); - panic!("Not yet implemented."); +impl Minimum for BigInt { + fn min<'a>(&'a self, other: &'a Self) -> &'a Self { + debug_assert!(self.test_invariant() && other.test_invariant()); + if self.data.len() < other.data.len() { + self + } else if self.data.len() > other.data.len() { + other + } else { + // compare back-to-front, i.e., most significant digit first + let mut idx = self.data.len()-1; + while idx > 0 { + if self.data[idx] < other.data[idx] { + return self; + } else if self.data[idx] > other.data[idx] { + return other; + } + else { + idx = idx-1; + } + } + // the two are equal + return self; + } } } diff --git a/src/part08.rs b/src/part08.rs new file mode 100644 index 0000000..558bd62 --- /dev/null +++ b/src/part08.rs @@ -0,0 +1,79 @@ +use std::cmp; +use std::ops; +use std::fmt; +use part05::BigInt; + +impl PartialEq for BigInt { + fn eq(&self, other: &BigInt) -> bool { + debug_assert!(self.test_invariant() && other.test_invariant()); + self.data == other.data + } +} + +fn call_eq() { + let b1 = BigInt::new(13); + let b2 = BigInt::new(37); + println!("b1 == b1: {} ; b1 == b2: {}; b1 != b2: {}", b1 == b1, b1 == b2, b1 != b2); +} + + +impl fmt::Debug for BigInt { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + self.data.fmt(f) + } +} + + + +impl BigInt { + pub fn inc(&mut self, mut by: u64) { + panic!("Not yet implemented."); + } +} + + +#[test] +fn test_inc() { + let mut b = BigInt::new(1337); + b.inc(1337); + assert!(b == BigInt::new(1337 + 1337)); + + b = BigInt::new(0); + assert_eq!(b, BigInt::from_vec(vec![0])); + b.inc(1 << 63); + assert_eq!(b, BigInt::from_vec(vec![1 << 63])); + b.inc(1 << 63); + assert_eq!(b, BigInt::from_vec(vec![0, 1])); + b.inc(1 << 63); + assert_eq!(b, BigInt::from_vec(vec![1 << 63, 1])); + b.inc(1 << 63); + assert_eq!(b, BigInt::from_vec(vec![0, 2])); +} + + +// Add with carry, returning the sum and the carry +fn overflowing_add(a: u64, b: u64, carry: bool) -> (u64, bool) { + let sum = u64::wrapping_add(a, b); + if sum >= a { + panic!("First addition did not overflow. Not implemented."); + } else { + panic!("First addition *did* overflow. Not implemented."); + } +} + +/*#[test]*/ +fn test_overflowing_add() { + assert_eq!(overflowing_add(10, 100, false), (110, false)); + assert_eq!(overflowing_add(10, 100, true), (111, false)); + assert_eq!(overflowing_add(1 << 63, 1 << 63, false), (0, true)); + assert_eq!(overflowing_add(1 << 63, 1 << 63, true), (1, true)); + assert_eq!(overflowing_add(1 << 63, (1 << 63) -1 , true), (0, true)); +} + +impl ops::Add for BigInt { + type Output = BigInt; + fn add(self, rhs: BigInt) -> Self::Output { + let mut result_vec:Vec = Vec::with_capacity(cmp::max(self.data.len(), rhs.data.len())); + panic!("Not yet implemented."); + } +}