From 5f6e02d64e3789115ea4327a045b8ad3c39b1808 Mon Sep 17 00:00:00 2001 From: Ralf Jung Date: Mon, 13 Jul 2015 15:57:37 +0200 Subject: [PATCH 01/16] implement rgrep, and write part 12 (draft) about it --- solutions/Cargo.lock | 59 +++++++++++++ solutions/Cargo.toml | 3 + solutions/src/lib.rs | 2 - solutions/src/main.rs | 9 ++ solutions/src/rgrep.rs | 113 +++++++++++++++++++++++++ src/main.rs | 3 +- src/part12.rs | 179 ++++++++++++++++++++++++++++++++++++++++ workspace/src/main.rs | 1 + workspace/src/part12.rs | 123 +++++++++++++++++++++++++++ 9 files changed, 489 insertions(+), 3 deletions(-) delete mode 100644 solutions/src/lib.rs create mode 100644 solutions/src/main.rs create mode 100644 solutions/src/rgrep.rs create mode 100644 src/part12.rs create mode 100644 workspace/src/part12.rs diff --git a/solutions/Cargo.lock b/solutions/Cargo.lock index ffb21e6..9aada13 100644 --- a/solutions/Cargo.lock +++ b/solutions/Cargo.lock @@ -1,4 +1,63 @@ [root] name = "solutions" version = "0.1.0" +dependencies = [ + "docopt 0.6.67 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "aho-corasick" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "memchr 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "docopt" +version = "0.6.67" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "regex 0.1.40 (registry+https://github.com/rust-lang/crates.io-index)", + "rustc-serialize 0.3.15 (registry+https://github.com/rust-lang/crates.io-index)", + "strsim 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "libc" +version = "0.1.8" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "memchr" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "libc 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "regex" +version = "0.1.40" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "aho-corasick 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)", + "memchr 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)", + "regex-syntax 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "regex-syntax" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "rustc-serialize" +version = "0.3.15" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "strsim" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" diff --git a/solutions/Cargo.toml b/solutions/Cargo.toml index 8aebfa9..d48a4dd 100644 --- a/solutions/Cargo.toml +++ b/solutions/Cargo.toml @@ -2,3 +2,6 @@ name = "solutions" version = "0.1.0" authors = ["Ralf Jung "] + +[dependencies] +docopt = "*" diff --git a/solutions/src/lib.rs b/solutions/src/lib.rs deleted file mode 100644 index cbe9705..0000000 --- a/solutions/src/lib.rs +++ /dev/null @@ -1,2 +0,0 @@ -pub mod bigint; -pub mod vec; diff --git a/solutions/src/main.rs b/solutions/src/main.rs new file mode 100644 index 0000000..8afd81c --- /dev/null +++ b/solutions/src/main.rs @@ -0,0 +1,9 @@ +extern crate docopt; + +pub mod bigint; +pub mod vec; +pub mod rgrep; + +pub fn main() { + rgrep::main(); +} \ No newline at end of file diff --git a/solutions/src/rgrep.rs b/solutions/src/rgrep.rs new file mode 100644 index 0000000..a3b74cc --- /dev/null +++ b/solutions/src/rgrep.rs @@ -0,0 +1,113 @@ +use std::io::prelude::*; +use std::{io, fs, thread, process}; +use std::sync::mpsc::{sync_channel, SyncSender, Receiver}; +use std::sync::Arc; + +#[derive(Clone,Copy)] +enum OutputMode { + Print, + SortAndPrint, + Count, +} +use self::OutputMode::*; + +struct Options { + files: Vec, + pattern: String, + output_mode: OutputMode, +} + +struct Line { + data: String, + file: usize, + line: usize, +} + +fn read_files(options: Arc, out_channel: SyncSender) { + for (fileidx, file) in options.files.iter().enumerate() { + let file = fs::File::open(file).unwrap(); + let file = io::BufReader::new(file); + for (lineidx, line) in file.lines().enumerate() { + let line = Line { data: line.unwrap(), file: fileidx, line: lineidx }; + out_channel.send(line).unwrap(); + } + } +} + +fn filter_lines(options: Arc, in_channel: Receiver, out_channel: SyncSender) { + for line in in_channel.iter() { + if line.data.contains(&options.pattern) { + out_channel.send(line).unwrap(); + } + } +} + +fn output_lines(options: Arc, in_channel: Receiver) { + match options.output_mode { + Print => { + for line in in_channel.iter() { + println!("{}:{}: {}", options.files[line.file], line.line, line.data); + } + }, + Count => { + let count = in_channel.iter().count(); + println!("{} hits for {}.", count, options.pattern); + }, + SortAndPrint => { + let _data: Vec = in_channel.iter().collect(); + unimplemented!() + } + } +} + +static USAGE: &'static str = " +Usage: rgrep [-c] [-s] ... + +Options: + -c, --count Count number of matching lines (rather than printing them). + -s, --sort Sort the lines before printing. +"; + +fn get_options() -> Options { + use docopt::Docopt; + + // Parse argv and exit the program with an error message if it fails. + let args = Docopt::new(USAGE).and_then(|d| d.parse()).unwrap_or_else(|e| e.exit()); + let count = args.get_bool("-c"); + let sort = args.get_bool("-s"); + let pattern = args.get_str(""); + let files = args.get_vec(""); + if count && sort { + println!("Setting both '-c' and '-s' at the same time does not make any sense."); + process::exit(1); + } + + // We need to make the strings owned to construct the `Options` instance. + Options { + files: files.iter().map(|file| file.to_string()).collect(), + pattern: pattern.to_string(), + output_mode: if count { Count } else if sort { SortAndPrint } else { Print }, + } +} + +fn run(options: Options) { + let options = Arc::new(options); + + // Set up the chain of threads. Use `sync_channel` with buffer-size of 16 to avoid needlessly filling RAM. + let (line_sender, line_receiver) = sync_channel(16); + let (filtered_sender, filtered_receiver) = sync_channel(16); + + let options1 = options.clone(); + let handle1 = thread::spawn(move || read_files(options1, line_sender)); + let options2 = options.clone(); + let handle2 = thread::spawn(move || filter_lines(options2, line_receiver, filtered_sender)); + let options3 = options.clone(); + let handle3 = thread::spawn(move || output_lines(options3, filtered_receiver)); + handle1.join().unwrap(); + handle2.join().unwrap(); + handle3.join().unwrap(); +} + +pub fn main() { + run(get_options()); +} diff --git a/src/main.rs b/src/main.rs index 4fe4215..8526698 100644 --- a/src/main.rs +++ b/src/main.rs @@ -29,7 +29,7 @@ // first requirement rules out a garbage collector: Rust can run "bare metal". // In fact, Rust rules out more classes of bugs than languages that achieve safety // with a GC: Besides dangling pointers and double-free, Rust also prevents issues -// such as iterator invalidation and race conditions. +// such as iterator invalidation and data races. // // // Getting started @@ -93,6 +93,7 @@ mod part08; mod part09; mod part10; mod part11; +mod part12; // To actually run the code of some part (after filling in the blanks, if necessary), simply edit the `main` // function. diff --git a/src/part12.rs b/src/part12.rs new file mode 100644 index 0000000..edcb9e0 --- /dev/null +++ b/src/part12.rs @@ -0,0 +1,179 @@ +// Rust-101, Part 12: Concurrency (WIP) +// ================= + +use std::io::prelude::*; +use std::{io, fs, thread}; +use std::sync::mpsc::{sync_channel, SyncSender, Receiver}; +use std::sync::Arc; + +//@ This part is introducing the concurrency features of Rust. We are going to write our own small version of "grep", +//@ called *rgrep*, and it is going to make use of multiple cores: One thread reads the input files, one thread does +//@ the actual matching, and one thread writes the output. + +// Before we come to the actual code, we define a data-structure `Options` to store all the information we need +// to complete the job: Which files to work on, which pattern to look for, and how to output.
+// Besides just printing all the matching lines, we will also offer to count them, or alternatively to sort them. +#[derive(Clone,Copy)] +enum OutputMode { + Print, + SortAndPrint, + Count, +} +use self::OutputMode::*; + +struct Options { + files: Vec, + pattern: String, + output_mode: OutputMode, +} + +//@ Now we can write three functions to do the actual job of reading, matching, and printing, respectively. +//@ To get the data from one thread to the next, we will use *message passing*: We will establish communication +//@ channels between the threads, with one thread *sending* data, and the other one receiving it. `SyncSender` +//@ is the type of the sending end of a synchronous channel transmitting data of type `T`. *Synchronous* here +//@ means that the `send` operation could block, waiting for the other side to make progress. We don't want to +//@ end up with the entire files being stored in the buffer of the channels, and the output not being fast enough +//@ to keep up with the speed of input. +//@ +//@ We also need all the threads to have access to the options of the job they are supposed to do. Since it would +//@ be rather unnecessary to actually copy these options around, we will use reference-counting to share them between +//@ all threads. `Arc` is the thread-safe version of `Rc, using atomic operations to keep the reference count up-to-date. +//@ You can also think of this as saying that *all* threads own the `Options` "a bit" - and since there could be other +//@ owners, `Arc` (just like `Rc`) only permits read-only access to its content. That's good enough for the options, though. + +// The first functions reads the files, and sends every line over the `out_channel`. +fn read_files(options: Arc, out_channel: SyncSender) { + for file in options.files.iter() { + // First, we open the file, ignoring any errors. + let file = fs::File::open(file).unwrap(); + // Then we obtain a `BufReader` for it, which provides the `lines` function. + let file = io::BufReader::new(file); + for line in file.lines() { + let line = line.unwrap(); + // Now we send the line over the channel, ignoring the possibility of `send` failing. + out_channel.send(line).unwrap(); + } + } + // When we drop the `out_channel`, it will be closed, which the other end can notice. +} + +// The second function filters the lines it receives through `in_channel` with the pattern, and sends +// matches via `out_channel`. +fn filter_lines(options: Arc, in_channel: Receiver, out_channel: SyncSender) { + // We can simply iterate over the channel, which will stop when the channel is closed. + for line in in_channel.iter() { + // `contains` works on lots of types of patterns, but in particular, we can use it to test whether + // one string is contained in another. + if line.contains(&options.pattern) { + out_channel.send(line).unwrap(); /*@*/ + } + } +} + +// The third function performs the output operations, receiving the relevant lines on its `in_channel`. +fn output_lines(options: Arc, in_channel: Receiver) { + match options.output_mode { + Print => { + // Here, we just print every line we see. + for line in in_channel.iter() { + println!("{}", line); /*@*/ + } + }, + Count => { + // We are supposed to count the number of matching lines. There's a convenient iterator adapter that + // we can use for this job. + let count = in_channel.iter().count(); /*@*/ + println!("{} hits for {}.", count, options.pattern); /*@*/ + }, + SortAndPrint => { + // We are asked to sort the matching lines before printing. So let's collect them all in a local vector... + let data: Vec = in_channel.iter().collect(); + // ...and implement the actual sorting later. + unimplemented!() + } + } +} + +// With the operations of the three threads defined, we can now implement a function that performs grepping according +// to some given options. +fn run(options: Options) { + // We move the `options` into an `Arc`, as that's what the thread workers expect. + let options = Arc::new(options); + + // Set up the channels. Use `sync_channel` with buffer-size of 16 to avoid needlessly filling RAM. + let (line_sender, line_receiver) = sync_channel(16); + let (filtered_sender, filtered_receiver) = sync_channel(16); + + // Spawn the read thread: `thread::spawn` takes a closure that is run in a new thread. + //@ The `move` keyword again tells Rust that we want ownership of captured variables to be moved into the + //@ closure. This means we need to do the `clone` *first*, otherwise we would lose our `options` to the + //@ new thread! + let options1 = options.clone(); + let handle1 = thread::spawn(move || read_files(options1, line_sender)); + + // Same with the filter thread. + let options2 = options.clone(); + let handle2 = thread::spawn(move || filter_lines(options2, line_receiver, filtered_sender)); + + // And the output thread. + let options3 = options.clone(); + let handle3 = thread::spawn(move || output_lines(options3, filtered_receiver)); + + // Finally, wait until all three threads did their job. + handle1.join().unwrap(); + handle2.join().unwrap(); + handle3.join().unwrap(); +} + +// Now we have all the pieces together for testing our `rgrep` with some hard-coded options. +//@ We need to call `to_string` on string literals to convert them to a fully-owned `String`. +pub fn main() { + let options = Options { + files: vec!["src/part10.rs".to_string(), "src/part11.rs".to_string(), "src/part12.rs".to_string()], + pattern: "let".to_string(), + output_mode: Print + }; + run(options); +} + +// **Exercise 12.1**: Change `rgrep` such that it prints now only the matching lines, but also the name of the file +// and the number of the line in the file. You will have to change the type of the channels from `String` to something +// that records this extra information. + +//@ ## Ownership, Borrowing, and Concurrency +//@ The little demo above showed that concurrency in Rust has a fairly simple API. However, considering Rust has closures, +//@ that should not be entirely surprising. However, as I mentioned in the beginning, Rust ensures that well-typed programs +//@ do not have data races. How can that be? A data race is typically defined as having two concurrent, unsynchronized +//@ accesses to the same memory location, at least one of which is a write. In other words, a data race is mutation in +//@ the presence of aliasing, which Rust reliably rules out! It turns out that the same mechanism that makes our single-threaded +//@ programs memory safe, and that prevents us from invalidating iterators, also helps secure our multi-threaded code against +//@ data races. For example, notice how `read_files` sends a `String` to `filter_lines`. At run-time, only the pointer to +//@ the string will actually be moved around (just like when a `String` is passed to a function with full ownership). However, +//@ `read_files` has to *give up* ownership of the string to perform `send`, to it is impossible for an outstanding borrow to +//@ still be around. After it sent the string to the other side, `read_files` has no way to race on the data with someone else. +//@ +//@ However, there is more to this. Remember the `'static` bound we had to add to `register` in the previous part, to make +//@ sure that the callbacks to not reference any pointers that might become invalid? This is just as crucial for spawning +//@ a thread: In general, that thread could last for much longer than the current stack frame. Thus, it must not use +//@ any pointers to data in that stack frame. This is achieved by requiring the `FnOnce` closure passed to `thread::spawn` +//@ to be valid for lifetime `'static`, as you can see in [its documentation](http://doc.rust-lang.org/stable/std/thread/fn.spawn.html). +//@ This avoids another kind of data race, where the thread's access races with the callee deallocating its stack frame. + +//@ ## Send +//@ However, the story goes further. I said above that `Arc` is a thread-safe version of `Rc`, which uses atomic operations +//@ to manipulate the reference count. It is thus crucial that we don't use `Rc` above, or the reference count may become invalid. +//@ And indeed, if you replace `Arc` by `Rc` (and add the appropriate imports), Rust will tell you that something is wrong. +//@ That's great, of course, but how did it do that? +//@ +//@ The answer is already hinted at in the error: It will say something about `Send`. You may have noticed that the closure in +//@ `thread::spawn` does not just have a `'static` bound, but also has to satisfy `Send`. `Send` is a trait, and just like `Copy`, +//@ it's just a marker - there are no functions provided by `Send` What the trait says is that types which are `Send`, can be +//@ safely sent to another thread without causing trouble. Of course, all the primitive data-types are `Send`. So is `Arc`, +//@ which is why Rust accepted our code. But `Rc` is not `Send`, and for a good reason! +//@ +//@ Now, `Send` as a trait is fairly special. It has a so-called *default implementation*. This means that *every type* implements +//@ `Send`, unless it opts out. Opting out is viral: If your type contains a type that opted out, then you don't have `Send`, either. +//@ So if the environment of your closure contains an `Rc`, it won't be `Send`, preventing it from causing trouble. If however every +//@ captured variable *is* `Send`, then so is the entire environment, and you are good. + +//@ [index](main.html) | [previous](part11.html) | [next](main.html) diff --git a/workspace/src/main.rs b/workspace/src/main.rs index 8531db6..98e8e8d 100644 --- a/workspace/src/main.rs +++ b/workspace/src/main.rs @@ -13,6 +13,7 @@ mod part08; mod part09; mod part10; mod part11; +mod part12; // This decides which part is actually run. fn main() { diff --git a/workspace/src/part12.rs b/workspace/src/part12.rs new file mode 100644 index 0000000..1d75bfd --- /dev/null +++ b/workspace/src/part12.rs @@ -0,0 +1,123 @@ +// Rust-101, Part 12: Concurrency (WIP) +// ================= + +use std::io::prelude::*; +use std::{io, fs, thread}; +use std::sync::mpsc::{sync_channel, SyncSender, Receiver}; +use std::sync::Arc; + + +// Before we come to the actual code, we define a data-structure `Options` to store all the information we need +// to complete the job: Which files to work on, which pattern to look for, and how to output.
+// Besides just printing all the matching lines, we will also offer to count them, or alternatively to sort them. +#[derive(Clone,Copy)] +enum OutputMode { + Print, + SortAndPrint, + Count, +} +use self::OutputMode::*; + +struct Options { + files: Vec, + pattern: String, + output_mode: OutputMode, +} + + +// The first functions reads the files, and sends every line over the `out_channel`. +fn read_files(options: Arc, out_channel: SyncSender) { + for file in options.files.iter() { + // First, we open the file, ignoring any errors. + let file = fs::File::open(file).unwrap(); + // Then we obtain a `BufReader` for it, which provides the `lines` function. + let file = io::BufReader::new(file); + for line in file.lines() { + let line = line.unwrap(); + // Now we send the line over the channel, ignoring the possibility of `send` failing. + out_channel.send(line).unwrap(); + } + } + // When we drop the `out_channel`, it will be closed, which the other end can notice. +} + +// The second function filters the lines it receives through `in_channel` with the pattern, and sends +// matches via `out_channel`. +fn filter_lines(options: Arc, in_channel: Receiver, out_channel: SyncSender) { + // We can simply iterate over the channel, which will stop when the channel is closed. + for line in in_channel.iter() { + // `contains` works on lots of types of patterns, but in particular, we can use it to test whether + // one string is contained in another. + if line.contains(&options.pattern) { + unimplemented!() + } + } +} + +// The third function performs the output operations, receiving the relevant lines on its `in_channel`. +fn output_lines(options: Arc, in_channel: Receiver) { + match options.output_mode { + Print => { + // Here, we just print every line we see. + for line in in_channel.iter() { + unimplemented!() + } + }, + Count => { + // We are supposed to count the number of matching lines. There's a convenient iterator adapter that + // we can use for this job. + unimplemented!() + }, + SortAndPrint => { + // We are asked to sort the matching lines before printing. So let's collect them all in a local vector... + let data: Vec = in_channel.iter().collect(); + // ...and implement the actual sorting later. + unimplemented!() + } + } +} + +// With the operations of the three threads defined, we can now implement a function that performs grepping according +// to some given options. +fn run(options: Options) { + // We move the `options` into an `Arc`, as that's what the thread workers expect. + let options = Arc::new(options); + + // Set up the channels. Use `sync_channel` with buffer-size of 16 to avoid needlessly filling RAM. + let (line_sender, line_receiver) = sync_channel(16); + let (filtered_sender, filtered_receiver) = sync_channel(16); + + // Spawn the read thread: `thread::spawn` takes a closure that is run in a new thread. + let options1 = options.clone(); + let handle1 = thread::spawn(move || read_files(options1, line_sender)); + + // Same with the filter thread. + let options2 = options.clone(); + let handle2 = thread::spawn(move || filter_lines(options2, line_receiver, filtered_sender)); + + // And the output thread. + let options3 = options.clone(); + let handle3 = thread::spawn(move || output_lines(options3, filtered_receiver)); + + // Finally, wait until all three threads did their job. + handle1.join().unwrap(); + handle2.join().unwrap(); + handle3.join().unwrap(); +} + +// Now we have all the pieces together for testing our `rgrep` with some hard-coded options. +pub fn main() { + let options = Options { + files: vec!["src/part10.rs".to_string(), "src/part11.rs".to_string(), "src/part12.rs".to_string()], + pattern: "let".to_string(), + output_mode: Print + }; + run(options); +} + +// **Exercise 12.1**: Change `rgrep` such that it prints now only the matching lines, but also the name of the file +// and the number of the line in the file. You will have to change the type of the channels from `String` to something +// that records this extra information. + + + -- 2.30.2 From bae9e47884fdc3fc1a81fb4844572a832fcfb2ce Mon Sep 17 00:00:00 2001 From: Ralf Jung Date: Mon, 13 Jul 2015 17:19:57 +0200 Subject: [PATCH 02/16] part 13 draft: sorting, external dependencies --- Cargo.lock | 59 ++++++++++++++++ Cargo.toml | 3 + solutions/src/rgrep.rs | 47 ++++++++++++- src/main.rs | 6 +- src/part12.rs | 18 ++--- src/part13.rs | 145 ++++++++++++++++++++++++++++++++++++++++ workspace/Cargo.toml | 3 + workspace/src/main.rs | 3 +- workspace/src/part12.rs | 18 ++--- workspace/src/part13.rs | 96 ++++++++++++++++++++++++++ 10 files changed, 374 insertions(+), 24 deletions(-) create mode 100644 src/part13.rs create mode 100644 workspace/src/part13.rs diff --git a/Cargo.lock b/Cargo.lock index b44a287..67ade17 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1,4 +1,63 @@ [root] name = "rust-101" version = "0.1.0" +dependencies = [ + "docopt 0.6.67 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "aho-corasick" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "memchr 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "docopt" +version = "0.6.67" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "regex 0.1.40 (registry+https://github.com/rust-lang/crates.io-index)", + "rustc-serialize 0.3.15 (registry+https://github.com/rust-lang/crates.io-index)", + "strsim 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "libc" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "memchr" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "libc 0.1.9 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "regex" +version = "0.1.40" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "aho-corasick 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)", + "memchr 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)", + "regex-syntax 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "regex-syntax" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "rustc-serialize" +version = "0.3.15" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "strsim" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" diff --git a/Cargo.toml b/Cargo.toml index 10572b4..e590353 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -2,3 +2,6 @@ name = "rust-101" version = "0.1.0" authors = ["Ralf Jung "] + +[dependencies] +docopt = "*" diff --git a/solutions/src/rgrep.rs b/solutions/src/rgrep.rs index a3b74cc..316e6f0 100644 --- a/solutions/src/rgrep.rs +++ b/solutions/src/rgrep.rs @@ -1,5 +1,5 @@ use std::io::prelude::*; -use std::{io, fs, thread, process}; +use std::{io, fs, thread, process, cmp}; use std::sync::mpsc::{sync_channel, SyncSender, Receiver}; use std::sync::Arc; @@ -23,6 +23,17 @@ struct Line { line: usize, } +impl PartialEq for Line { + fn eq(&self, other: &Line) -> bool { + self.data.eq(&other.data) + } +} +impl PartialOrd for Line { + fn partial_cmp(&self, other: &Line) -> Option { + self.data.partial_cmp(&other.data) + } +} + fn read_files(options: Arc, out_channel: SyncSender) { for (fileidx, file) in options.files.iter().enumerate() { let file = fs::File::open(file).unwrap(); @@ -42,6 +53,33 @@ fn filter_lines(options: Arc, in_channel: Receiver, out_channel: } } +fn sort(data: &mut [T]) { + if data.len() < 2 { return; } + + let mut lpos = 1; + let mut rpos = data.len(); + // Invariant: pivot is data[0]; (0,lpos) is <= pivot; [rpos,len) is >= pivot; lpos < rpos + loop { + while lpos < rpos && data[lpos] <= data[0] { + lpos += 1; + } + while rpos > lpos && data[rpos-1] >= data[0] { + rpos -= 1; + } + if rpos == lpos { + break; + } + + data.swap(lpos, rpos-1); + } + + data.swap(0, lpos-1); // put pivot in the right place + + let (part1, part2) = data.split_at_mut(lpos); + sort(&mut part1[..lpos-1]); + sort(part2); +} + fn output_lines(options: Arc, in_channel: Receiver) { match options.output_mode { Print => { @@ -54,8 +92,11 @@ fn output_lines(options: Arc, in_channel: Receiver) { println!("{} hits for {}.", count, options.pattern); }, SortAndPrint => { - let _data: Vec = in_channel.iter().collect(); - unimplemented!() + let mut data: Vec = in_channel.iter().collect(); + sort(&mut data[..]); + for line in data.iter() { + println!("{}:{}: {}", options.files[line.file], line.line, line.data); + } } } } diff --git a/src/main.rs b/src/main.rs index 8526698..0290eba 100644 --- a/src/main.rs +++ b/src/main.rs @@ -80,7 +80,8 @@ // * [Part 10: Closures](part10.html) // * [Part 11: Trait Objects, Box, Rc, Lifetime bounds](part11.html) // * (to be continued) -#![allow(dead_code, unused_imports, unused_variables, unused_mut)] +#![allow(dead_code, unused_imports, unused_variables, unused_mut, unreachable_code)] +/* extern crate docopt; */ mod part00; mod part01; mod part02; @@ -94,14 +95,15 @@ mod part09; mod part10; mod part11; mod part12; +mod part13; // To actually run the code of some part (after filling in the blanks, if necessary), simply edit the `main` // function. - fn main() { part00::main(); } + // Additional material // ------------------- // diff --git a/src/part12.rs b/src/part12.rs index edcb9e0..477a3ae 100644 --- a/src/part12.rs +++ b/src/part12.rs @@ -14,17 +14,17 @@ use std::sync::Arc; // to complete the job: Which files to work on, which pattern to look for, and how to output.
// Besides just printing all the matching lines, we will also offer to count them, or alternatively to sort them. #[derive(Clone,Copy)] -enum OutputMode { +pub enum OutputMode { Print, SortAndPrint, Count, } use self::OutputMode::*; -struct Options { - files: Vec, - pattern: String, - output_mode: OutputMode, +pub struct Options { + pub files: Vec, + pub pattern: String, + pub output_mode: OutputMode, } //@ Now we can write three functions to do the actual job of reading, matching, and printing, respectively. @@ -87,7 +87,7 @@ fn output_lines(options: Arc, in_channel: Receiver) { }, SortAndPrint => { // We are asked to sort the matching lines before printing. So let's collect them all in a local vector... - let data: Vec = in_channel.iter().collect(); + let mut data: Vec = in_channel.iter().collect(); // ...and implement the actual sorting later. unimplemented!() } @@ -96,7 +96,7 @@ fn output_lines(options: Arc, in_channel: Receiver) { // With the operations of the three threads defined, we can now implement a function that performs grepping according // to some given options. -fn run(options: Options) { +pub fn run(options: Options) { // We move the `options` into an `Arc`, as that's what the thread workers expect. let options = Arc::new(options); @@ -125,7 +125,7 @@ fn run(options: Options) { handle3.join().unwrap(); } -// Now we have all the pieces together for testing our `rgrep` with some hard-coded options. +// Now we have all the pieces together for testing our rgrep with some hard-coded options. //@ We need to call `to_string` on string literals to convert them to a fully-owned `String`. pub fn main() { let options = Options { @@ -136,7 +136,7 @@ pub fn main() { run(options); } -// **Exercise 12.1**: Change `rgrep` such that it prints now only the matching lines, but also the name of the file +// **Exercise 12.1**: Change rgrep such that it prints now only the matching lines, but also the name of the file // and the number of the line in the file. You will have to change the type of the channels from `String` to something // that records this extra information. diff --git a/src/part13.rs b/src/part13.rs new file mode 100644 index 0000000..0121079 --- /dev/null +++ b/src/part13.rs @@ -0,0 +1,145 @@ +// Rust-101, Part 13: Slices, Arrays, External Dependencies +// ================= + +//@ To complete rgrep, there are two pieces we still need to implement: Sorting, and taking the job options +//@ as argument to the program, rather than hard-coding them. Let's start with sorting. + +// ## Slices +//@ Again, we first have to think about the type we want to give to our sorting function. We may be inclined to +//@ pass it a `Vec`. Now, sorting does not actually consume the argument, so we could make that a `&mut Vec`. +//@ But there's a problem with that: If we want to implement some divide-and-conquer sorting algorithm (say, +//@ Quicksort), then we will have to *split* our argument at some point, and operate recursively on the two parts. +//@ But we can't split a `Vec`! We could now extend the function signature to also take some indices, marking the +//@ part of the vector we are supposed to sort, but that's all rather clumsy. Rust offers a nicer solution. +//@ +//@ `[T]` is the type of an (unsized) *array*, with elements of type `T`. All this means is that there's a contiguous +//@ region of memory, where a bunch of `T` are stored. How many` We can't tell! This is an unsized type. Just like for +//@ trait objects, this means we can only operate on pointers to that type, and these pointers will containing the missing +//@ information - namely, the length. Such a pointer is called a *slice*. As we will see, a slice can be split! +//@ Our function can thus take a borrowed slice, and promise to sort all elements in there. +pub fn sort(data: &mut [T]) { + if data.len() < 2 { return; } + + // We decide that the element at 0 is our pivot, and then we move our cursors through the rest of the slice, + // making sure that everything on the left is no larger than the pivot, and everything on the right is no smaller. + let mut lpos = 1; + let mut rpos = data.len(); + /* Invariant: pivot is data[0]; everything with index (0,lpos) is <= pivot; [rpos,len) is >= pivot; lpos < rpos */ + loop { + // **Exercise 13.1**: Complete this Quicksort loop. You can use `swap` on slices to swap two elements. + unimplemented!() + } + + // Once our cursors met, we need to put the pivot in the right place. + data.swap(0, lpos-1); + + // Finally, we split our slice to sort the two halves. The nice part about slices is that splitting them is cheap: + //@ They are just a pointer to a start address, and a length. We can thus get two pointers, one at the beginning and + //@ one in the middle, and set the lengths appropriately such that they don't overlap. This is what `split_at_mut` does. + //@ Since the two slices don't overlap, there is no aliasing and we can have them both mutably borrowed. + let (part1, part2) = data.split_at_mut(lpos); + //@ The index operation can not only be used to address certain elements, it can also be used for "slicing": Giving a range + //@ of indices, and obtaining an appropriate part of the slice we started with. Here, we remove the last element from + //@ `part1`, which is the pivot. This makes sure both recursive calls work on strictly smaller slices. + sort(&mut part1[..lpos-1]); /*@*/ + sort(part2); /*@*/ +} + +// **Exercise 13.2*: Since `String` implements `PartialEq`, you can now change the function `output_lines` in the previous part +// to call the sort function above. If you did exercise 12.1, you will have slightly more work. Make sure you sort by the matched line +// only, not by filename or line number! + +// Now, we can sort, e.g., an vector of numbers. +fn sort_nums(data: &mut Vec) { + //@ Vectors support slicing, just like slices do. Here, `..` denotes the full range, which means we want to slice the entire vector. + //@ It is then passed to the `sort` function, which doesn't even know that it is working on data inside a vector. + sort(&mut data[..]); +} + +// ## Arrays +//@ An *array* in Rust is given be the type `[T; n]`, where `n` is some *fixed* number. So, `[f64; 10]` is an array of 10 floating-point +//@ numbers, all one right next to the other in memory. Arrays are sized, and hence can be used like any other type. But we can also +//@ borrow them as slices, e.g., to sort them. +fn sort_array() { + let mut data: [f64; 5] = [1.0, 3.4, 12.7, -9.12, 0.1]; + sort(&mut data); +} + +// ## External Dependencies +//@ This leaves us with just one more piece to complete rgrep: Taking arguments from the command-line. We could now directly work on +//@ [`std::env::args`](http://doc.rust-lang.org/beta/std/env/fn.args.html) to gain access to those arguments, and this would become +//@ a pretty boring lesson in string manipulation. Instead, I want to use this opportunity to show how easy it is to benefit from +//@ other people's work in your program. +//@ +//@ For sure, we are not the first to equip a Rust program with support for command-line arguments. Someone must have written a library +//@ for the job, right? Indeed, someone has. Rust has a central repository of published libraries, called [crates.io](https://crates.io/). +//@ It's a bit like [PyPI](https://pypi.python.org/pypi) or the [Ruby Gems](https://rubygems.org/): Everybody can upload their code, +//@ and there's tooling for importing that code into your project. This tooling is provided by `cargo`, the tool we are already using to +//@ build this tutorial. (`cargo` also has support for *publishing* your crate on crates.io, I refer you to [the documentation](http://doc.crates.io/crates-io.html) for more details.) +//@ In this case, we are going to use the [`docopt` crate](https://crates.io/crates/docopt), which creates a parser for command-line +//@ arguments based on the usage string. External dependencies are declared in the `Cargo.toml` file. + +//@ I already prepared that file, but the declaration of the dependency is still commented out. So please open `Cargo.toml` of your workspace +//@ now, and enabled the two commented-out lines. Then do `cargo build`. Cargo will now download the crate from crates.io, compile it, +//@ and link it to your program. In the future, you can do `cargo update` to make it download new versions of crates you depend on. +//@ Note that crates.io is only the default location for dependencies, you can also give it the URL of a git repository or some local +//@ path. All of this is explained in the [Cargo Guide](http://doc.crates.io/guide.html). + +// I disabled the following module (using a rather bad hack), because it only compiles if `docopt` is linked. However, before enabling it, +// you still have get the external library into the global namespace. This is done with `extern crate docopt;`, and that statement *has* to be +// in `main.rs`. So please go there, and enable this commented-out line. Then remove the attribute of the following module. +#[cfg(feature = "disabled")] +pub mod rgrep { + // Now that `docopt` is linked and declared in `main.rs`, we can import it with `use`. We also import some other pieces that we will need. + use docopt::Docopt; + use part12::{run, Options, OutputMode}; + use std::process; + + // The USAGE string documents how the program is to be called. It's written in a format that `docopt` can parse. + static USAGE: &'static str = " +Usage: rgrep [-c] [-s] ... + +Options: + -c, --count Count number of matching lines (rather than printing them). + -s, --sort Sort the lines before printing. +"; + + // This function extracts the rgrep options from the command-line arguments. + fn get_options() -> Options { + // Parse argv and exit the program with an error message if it fails. This is taken from the [`docopt` documentation](http://burntsushi.net/rustdoc/docopt/). + let args = Docopt::new(USAGE).and_then(|d| d.parse()).unwrap_or_else(|e| e.exit()); + // Now we can get all the values out. + let count = args.get_bool("-c"); + let sort = args.get_bool("-s"); + let pattern = args.get_str(""); + let files = args.get_vec(""); + if count && sort { + println!("Setting both '-c' and '-s' at the same time does not make any sense."); + process::exit(1); + } + + // We need to make the strings owned to construct the `Options` instance. + //@ If you check all the type carefully, you will notice that `pattern` above if of type `&str`. `str` is the type of a UTF-8 encoded string, that is, a bunch of + //@ bytes in memory (`[u8]`) that are valid according of UTF-8. `str` is unsized. `&str` is a sliced string, and stores the address of the character data, and + //@ their length. String literals like "this one" are of type `&'static str`: They point right to the constant section of the binary, you you cannot claim you + //@ own them. However, the borrow is valid for as long as the program runs, hence it has lifetime `'static`. Calling `to_string` will copy the string data + //@ into an owned buffer on the heap, and thus convert it to `String`. + Options { + files: files.iter().map(|file| file.to_string()).collect(), + pattern: pattern.to_string(), + output_mode: if count { OutputMode::Count } else if sort { OutputMode::SortAndPrint } else { OutputMode::Print }, + } + } + + // Finally, we can call the `run` function from the previous part on the options extracted using `get_options`. Edit `main.rs` to call this function. + // You can now use `cargo run -- ` to call your program, and see the argument parser and the threads we wrote previously in action! + pub fn main() { + run(get_options()); + } +} + +// **Exercise 13.3**: Wouldn't it be nice if rgrep supported regular expressions? There's already a crate that does all the parsing and matching on regular +// expression, it's called [regex](https://crates.io/crates/regex). Add this crate to the dependencies of your workspace, add an option ("-r") to switch +// the pattern to regular-expression mode, and change `filter_lines` to honor this option. The documentation of regex is available from its crates.io site. + +//@ [index](main.html) | [previous](part12.html) | [next](main.html) diff --git a/workspace/Cargo.toml b/workspace/Cargo.toml index a038197..81228f7 100644 --- a/workspace/Cargo.toml +++ b/workspace/Cargo.toml @@ -1,3 +1,6 @@ [package] name = "rust-101-workspace" version = "0.0.0" + +#[dependencies] +#docopt = "*" diff --git a/workspace/src/main.rs b/workspace/src/main.rs index 98e8e8d..7e7c200 100644 --- a/workspace/src/main.rs +++ b/workspace/src/main.rs @@ -1,4 +1,4 @@ -#![allow(dead_code, unused_imports, unused_variables, unused_mut)] +#![allow(dead_code, unused_imports, unused_variables, unused_mut, unreachable_code)] // Only the files imported here will be compiled. mod part00; @@ -14,6 +14,7 @@ mod part09; mod part10; mod part11; mod part12; +mod part13; // This decides which part is actually run. fn main() { diff --git a/workspace/src/part12.rs b/workspace/src/part12.rs index 1d75bfd..84d47ec 100644 --- a/workspace/src/part12.rs +++ b/workspace/src/part12.rs @@ -11,17 +11,17 @@ use std::sync::Arc; // to complete the job: Which files to work on, which pattern to look for, and how to output.
// Besides just printing all the matching lines, we will also offer to count them, or alternatively to sort them. #[derive(Clone,Copy)] -enum OutputMode { +pub enum OutputMode { Print, SortAndPrint, Count, } use self::OutputMode::*; -struct Options { - files: Vec, - pattern: String, - output_mode: OutputMode, +pub struct Options { + pub files: Vec, + pub pattern: String, + pub output_mode: OutputMode, } @@ -70,7 +70,7 @@ fn output_lines(options: Arc, in_channel: Receiver) { }, SortAndPrint => { // We are asked to sort the matching lines before printing. So let's collect them all in a local vector... - let data: Vec = in_channel.iter().collect(); + let mut data: Vec = in_channel.iter().collect(); // ...and implement the actual sorting later. unimplemented!() } @@ -79,7 +79,7 @@ fn output_lines(options: Arc, in_channel: Receiver) { // With the operations of the three threads defined, we can now implement a function that performs grepping according // to some given options. -fn run(options: Options) { +pub fn run(options: Options) { // We move the `options` into an `Arc`, as that's what the thread workers expect. let options = Arc::new(options); @@ -105,7 +105,7 @@ fn run(options: Options) { handle3.join().unwrap(); } -// Now we have all the pieces together for testing our `rgrep` with some hard-coded options. +// Now we have all the pieces together for testing our rgrep with some hard-coded options. pub fn main() { let options = Options { files: vec!["src/part10.rs".to_string(), "src/part11.rs".to_string(), "src/part12.rs".to_string()], @@ -115,7 +115,7 @@ pub fn main() { run(options); } -// **Exercise 12.1**: Change `rgrep` such that it prints now only the matching lines, but also the name of the file +// **Exercise 12.1**: Change rgrep such that it prints now only the matching lines, but also the name of the file // and the number of the line in the file. You will have to change the type of the channels from `String` to something // that records this extra information. diff --git a/workspace/src/part13.rs b/workspace/src/part13.rs new file mode 100644 index 0000000..3ef7785 --- /dev/null +++ b/workspace/src/part13.rs @@ -0,0 +1,96 @@ +// Rust-101, Part 13: Slices, Arrays, External Dependencies +// ================= + + +// ## Slices +pub fn sort(data: &mut [T]) { + if data.len() < 2 { return; } + + // We decide that the element at 0 is our pivot, and then we move our cursors through the rest of the slice, + // making sure that everything on the left is no larger than the pivot, and everything on the right is no smaller. + let mut lpos = 1; + let mut rpos = data.len(); + /* Invariant: pivot is data[0]; everything with index (0,lpos) is <= pivot; [rpos,len) is >= pivot; lpos < rpos */ + loop { + // **Exercise 13.1**: Complete this Quicksort loop. You can use `swap` on slices to swap two elements. + unimplemented!() + } + + // Once our cursors met, we need to put the pivot in the right place. + data.swap(0, lpos-1); + + // Finally, we split our slice to sort the two halves. The nice part about slices is that splitting them is cheap: + let (part1, part2) = data.split_at_mut(lpos); + unimplemented!() +} + +// **Exercise 13.2*: Since `String` implements `PartialEq`, you can now change the function `output_lines` in the previous part +// to call the sort function above. If you did exercise 12.1, you will have slightly more work. Make sure you sort by the matched line +// only, not by filename or line number! + +// Now, we can sort, e.g., an vector of numbers. +fn sort_nums(data: &mut Vec) { + sort(&mut data[..]); +} + +// ## Arrays +fn sort_array() { + let mut data: [f64; 5] = [1.0, 3.4, 12.7, -9.12, 0.1]; + sort(&mut data); +} + +// ## External Dependencies + + +// I disabled the following module (using a rather bad hack), because it only compiles if `docopt` is linked. However, before enabling it, +// you still have get the external library into the global namespace. This is done with `extern crate docopt;`, and that statement *has* to be +// in `main.rs`. So please go there, and enable this commented-out line. Then remove the attribute of the following module. +#[cfg(feature = "disabled")] +pub mod rgrep { + // Now that `docopt` is linked and declared in `main.rs`, we can import it with `use`. We also import some other pieces that we will need. + use docopt::Docopt; + use part12::{run, Options, OutputMode}; + use std::process; + + // The USAGE string documents how the program is to be called. It's written in a format that `docopt` can parse. + static USAGE: &'static str = " +Usage: rgrep [-c] [-s] ... + +Options: + -c, --count Count number of matching lines (rather than printing them). + -s, --sort Sort the lines before printing. +"; + + // This function extracts the rgrep options from the command-line arguments. + fn get_options() -> Options { + // Parse argv and exit the program with an error message if it fails. This is taken from the [`docopt` documentation](http://burntsushi.net/rustdoc/docopt/). + let args = Docopt::new(USAGE).and_then(|d| d.parse()).unwrap_or_else(|e| e.exit()); + // Now we can get all the values out. + let count = args.get_bool("-c"); + let sort = args.get_bool("-s"); + let pattern = args.get_str(""); + let files = args.get_vec(""); + if count && sort { + println!("Setting both '-c' and '-s' at the same time does not make any sense."); + process::exit(1); + } + + // We need to make the strings owned to construct the `Options` instance. + Options { + files: files.iter().map(|file| file.to_string()).collect(), + pattern: pattern.to_string(), + output_mode: if count { OutputMode::Count } else if sort { OutputMode::SortAndPrint } else { OutputMode::Print }, + } + } + + // Finally, we can call the `run` function from the previous part on the options extracted using `get_options`. Edit `main.rs` to call this function. + // You can now use `cargo run -- ` to call your program, and see the argument parser and the threads we wrote previously in action! + pub fn main() { + run(get_options()); + } +} + +// **Exercise 13.3**: Wouldn't it be nice if rgrep supported regular expressions? There's already a crate that does all the parsing and matching on regular +// expression, it's called [regex](https://crates.io/crates/regex). Add this crate to the dependencies of your workspace, add an option ("-r") to switch +// the pattern to regular-expression mode, and change `filter_lines` to honor this option. The documentation of regex is available from its crates.io site. + -- 2.30.2 From 562558d25054c5be82f11acad0fbe53699de5b1c Mon Sep 17 00:00:00 2001 From: Ralf Jung Date: Tue, 14 Jul 2015 12:42:31 +0200 Subject: [PATCH 03/16] finish parts 12, 13 --- TODO.txt | 7 +--- solutions/src/main.rs | 5 +++ solutions/src/rgrep.rs | 2 +- src/main.rs | 2 ++ src/part11.rs | 16 +++++++--- src/part12.rs | 71 +++++++++++++++++++++++------------------ src/part13.rs | 60 +++++++++++++++++++++------------- workspace/src/part11.rs | 5 ++- workspace/src/part12.rs | 25 +++++++++------ workspace/src/part13.rs | 33 ++++++++++++------- 10 files changed, 139 insertions(+), 87 deletions(-) diff --git a/TODO.txt b/TODO.txt index cc3268b..ed3434c 100644 --- a/TODO.txt +++ b/TODO.txt @@ -1,8 +1,3 @@ -* Arrays/slices -* Arc, concurrency, channels: Some grep-like thing, "rgrep" -* Send, Sync -* External dependencies: regexp crate, add to rgrep - -* Shared-memoty concurrency, interior mutability: Concurrent counter +* Shared-memoty concurrency, interior mutability, Sync: Concurrent counter * Drop, unsafe: doubly-linked list diff --git a/solutions/src/main.rs b/solutions/src/main.rs index 8afd81c..a0e3f72 100644 --- a/solutions/src/main.rs +++ b/solutions/src/main.rs @@ -1,3 +1,8 @@ +// This crate contains solutions to *some* of the exercises, and it bundles +// the projects that span multiple parts together in one file per project. +// It is not always up-to-date with the code in the actual course, and mainly +// serves as draft board for new parts or exercises. + extern crate docopt; pub mod bigint; diff --git a/solutions/src/rgrep.rs b/solutions/src/rgrep.rs index 316e6f0..e64d2fd 100644 --- a/solutions/src/rgrep.rs +++ b/solutions/src/rgrep.rs @@ -134,7 +134,7 @@ fn get_options() -> Options { fn run(options: Options) { let options = Arc::new(options); - // Set up the chain of threads. Use `sync_channel` with buffer-size of 16 to avoid needlessly filling RAM. + // This sets up the chain of threads. Use `sync_channel` with buffer-size of 16 to avoid needlessly filling RAM. let (line_sender, line_receiver) = sync_channel(16); let (filtered_sender, filtered_receiver) = sync_channel(16); diff --git a/src/main.rs b/src/main.rs index 0290eba..0aa0429 100644 --- a/src/main.rs +++ b/src/main.rs @@ -79,6 +79,8 @@ // * [Part 09: Iterators](part09.html) // * [Part 10: Closures](part10.html) // * [Part 11: Trait Objects, Box, Rc, Lifetime bounds](part11.html) +// * [Part 12: Concurrency, Send](part12.html) +// * [Part 13: Slices, Arrays, External Dependencies](part13.html) // * (to be continued) #![allow(dead_code, unused_imports, unused_variables, unused_mut, unreachable_code)] /* extern crate docopt; */ diff --git a/src/part11.rs b/src/part11.rs index 4e5c5fd..caf866a 100644 --- a/src/part11.rs +++ b/src/part11.rs @@ -66,7 +66,10 @@ mod callbacks { //@ variable into the closure. Its environment will then contain a `usize` rather than a `&mut uszie`, and have //@ no effect on this local variable anymore. let mut count: usize = 0; - c.register(Box::new(move |val| { count = count+1; println!("Callback 2, {}. time: {}", count, val); } )); + c.register(Box::new(move |val| { + count = count+1; + println!("Callback 2, {}. time: {}", count, val); + } )); c.call(1); c.call(2); } } @@ -109,9 +112,12 @@ mod callbacks_clone { //@ and do the creation of the `Rc` and the conversion to `Fn(i32)` itself. //@ For this to work, we need to demand that the type `F` does not contain any short-lived borrows. After all, we will store it - //@ in our list of callbacks indefinitely. `'static` is a lifetime, the lifetime of the entire program. We can use lifetimes - //@ as bounds on types, to demand that anything in (an element of) the type lives at least as long as this lifetime. That bound was implicit in the `Box` - //@ above, and it is the reason we could not have the borrowed `count` in the closure in `demo`. + //@ in our list of callbacks indefinitely. If the closure contained a pointer to our caller's stackframe, that pointer + //@ could be invalid by the time the closure is called. We can mitigate this by bounding `F` by a *lifetime*: `T: 'a` says + //@ that all data of type `T` will *outlive* (i.e., will be valid for at least as long as) lifetime `'a`. + //@ Here, we use the special lifetime `'static`, which is the lifetime of the entire program. + //@ The same bound has been implicitly added in the version of `register` above, and in the definition of + //@ `Callbacks`. This is the reason we could not have the borrowed `count` in the closure in `demo` previously. pub fn register(&mut self, callback: F) { self.callbacks.push(Rc::new(callback)); /*@*/ } @@ -147,4 +153,4 @@ mod callbacks_clone { //@ than one version per type it is instantiated with). This makes for smaller code, but you pay the overhead of the virtual function calls. //@ Isn't it beautiful how traits can handle both of these cases (and much more, as we saw, like closures and operator overloading) nicely? -//@ [index](main.html) | [previous](part10.html) | [next](main.html) +//@ [index](main.html) | [previous](part10.html) | [next](part12.html) diff --git a/src/part12.rs b/src/part12.rs index 477a3ae..8a14def 100644 --- a/src/part12.rs +++ b/src/part12.rs @@ -1,18 +1,20 @@ -// Rust-101, Part 12: Concurrency (WIP) -// ================= +// Rust-101, Part 12: Concurrency, Send +// ==================================== use std::io::prelude::*; use std::{io, fs, thread}; use std::sync::mpsc::{sync_channel, SyncSender, Receiver}; use std::sync::Arc; -//@ This part is introducing the concurrency features of Rust. We are going to write our own small version of "grep", -//@ called *rgrep*, and it is going to make use of multiple cores: One thread reads the input files, one thread does -//@ the actual matching, and one thread writes the output. +//@ Our next stop are the concurrency features of Rust. We are going to write our own small version of "grep", +//@ called *rgrep*, and it is going to make use of concurrency: One thread reads the input files, one thread does +//@ the actual matching, and one thread writes the output. I already mentioned in the beginning of the course that +//@ Rust's type system (more precisely, the discipline of ownership and borrowing) will help us to avoid a common +//@ pitfall of concurrent programming: data races. // Before we come to the actual code, we define a data-structure `Options` to store all the information we need // to complete the job: Which files to work on, which pattern to look for, and how to output.
-// Besides just printing all the matching lines, we will also offer to count them, or alternatively to sort them. +//@ Besides just printing all the matching lines, we will also offer to count them, or alternatively to sort them. #[derive(Clone,Copy)] pub enum OutputMode { Print, @@ -29,19 +31,17 @@ pub struct Options { //@ Now we can write three functions to do the actual job of reading, matching, and printing, respectively. //@ To get the data from one thread to the next, we will use *message passing*: We will establish communication -//@ channels between the threads, with one thread *sending* data, and the other one receiving it. `SyncSender` +//@ channels between the threads, with one thread *sending* data, and the other one *receiving* it. `SyncSender` //@ is the type of the sending end of a synchronous channel transmitting data of type `T`. *Synchronous* here //@ means that the `send` operation could block, waiting for the other side to make progress. We don't want to -//@ end up with the entire files being stored in the buffer of the channels, and the output not being fast enough +//@ end up with the entire file being stored in the buffer of the channels, and the output not being fast enough //@ to keep up with the speed of input. //@ //@ We also need all the threads to have access to the options of the job they are supposed to do. Since it would //@ be rather unnecessary to actually copy these options around, we will use reference-counting to share them between -//@ all threads. `Arc` is the thread-safe version of `Rc, using atomic operations to keep the reference count up-to-date. -//@ You can also think of this as saying that *all* threads own the `Options` "a bit" - and since there could be other -//@ owners, `Arc` (just like `Rc`) only permits read-only access to its content. That's good enough for the options, though. +//@ all threads. `Arc` is the thread-safe version of `Rc`, using atomic operations to keep the reference count up-to-date. -// The first functions reads the files, and sends every line over the `out_channel`. +// The first function reads the files, and sends every line over the `out_channel`. fn read_files(options: Arc, out_channel: SyncSender) { for file in options.files.iter() { // First, we open the file, ignoring any errors. @@ -59,11 +59,13 @@ fn read_files(options: Arc, out_channel: SyncSender) { // The second function filters the lines it receives through `in_channel` with the pattern, and sends // matches via `out_channel`. -fn filter_lines(options: Arc, in_channel: Receiver, out_channel: SyncSender) { +fn filter_lines(options: Arc, + in_channel: Receiver, + out_channel: SyncSender) { // We can simply iterate over the channel, which will stop when the channel is closed. for line in in_channel.iter() { // `contains` works on lots of types of patterns, but in particular, we can use it to test whether - // one string is contained in another. + // one string is contained in another. This is another example of Rust using traits as substitute for overloading. if line.contains(&options.pattern) { out_channel.send(line).unwrap(); /*@*/ } @@ -100,7 +102,7 @@ pub fn run(options: Options) { // We move the `options` into an `Arc`, as that's what the thread workers expect. let options = Arc::new(options); - // Set up the channels. Use `sync_channel` with buffer-size of 16 to avoid needlessly filling RAM. + // This sets up the channels. We use a `sync_channel` with buffer-size of 16 to avoid needlessly filling RAM. let (line_sender, line_receiver) = sync_channel(16); let (filtered_sender, filtered_receiver) = sync_channel(16); @@ -113,7 +115,9 @@ pub fn run(options: Options) { // Same with the filter thread. let options2 = options.clone(); - let handle2 = thread::spawn(move || filter_lines(options2, line_receiver, filtered_sender)); + let handle2 = thread::spawn(move || { + filter_lines(options2, line_receiver, filtered_sender) + }); // And the output thread. let options3 = options.clone(); @@ -129,45 +133,50 @@ pub fn run(options: Options) { //@ We need to call `to_string` on string literals to convert them to a fully-owned `String`. pub fn main() { let options = Options { - files: vec!["src/part10.rs".to_string(), "src/part11.rs".to_string(), "src/part12.rs".to_string()], + files: vec!["src/part10.rs".to_string(), + "src/part11.rs".to_string(), + "src/part12.rs".to_string()], pattern: "let".to_string(), output_mode: Print }; run(options); } -// **Exercise 12.1**: Change rgrep such that it prints now only the matching lines, but also the name of the file +// **Exercise 12.1**: Change rgrep such that it prints not only the matching lines, but also the name of the file // and the number of the line in the file. You will have to change the type of the channels from `String` to something // that records this extra information. //@ ## Ownership, Borrowing, and Concurrency -//@ The little demo above showed that concurrency in Rust has a fairly simple API. However, considering Rust has closures, -//@ that should not be entirely surprising. However, as I mentioned in the beginning, Rust ensures that well-typed programs -//@ do not have data races. How can that be? A data race is typically defined as having two concurrent, unsynchronized +//@ The little demo above showed that concurrency in Rust has a fairly simple API. Considering Rust has closures, +//@ that should not be entirely surprising. However, as it turns out, Rust goes well beyond this and actually ensures +//@ the absence of data races.
+//@ A data race is typically defined as having two concurrent, unsynchronized //@ accesses to the same memory location, at least one of which is a write. In other words, a data race is mutation in //@ the presence of aliasing, which Rust reliably rules out! It turns out that the same mechanism that makes our single-threaded //@ programs memory safe, and that prevents us from invalidating iterators, also helps secure our multi-threaded code against //@ data races. For example, notice how `read_files` sends a `String` to `filter_lines`. At run-time, only the pointer to -//@ the string will actually be moved around (just like when a `String` is passed to a function with full ownership). However, +//@ the character data will actually be moved around (just like when a `String` is passed to a function with full ownership). However, //@ `read_files` has to *give up* ownership of the string to perform `send`, to it is impossible for an outstanding borrow to -//@ still be around. After it sent the string to the other side, `read_files` has no way to race on the data with someone else. +//@ still be around. After it sent the string to the other side, `read_files` has no pointer into the string content +//@ anymore, and hence no way to race on the data with someone else. //@ -//@ However, there is more to this. Remember the `'static` bound we had to add to `register` in the previous part, to make -//@ sure that the callbacks to not reference any pointers that might become invalid? This is just as crucial for spawning +//@ There is a little more to this. Remember the `'static` bound we had to add to `register` in the previous part, to make +//@ sure that the callbacks do not reference any pointers that might become invalid? This is just as crucial for spawning //@ a thread: In general, that thread could last for much longer than the current stack frame. Thus, it must not use //@ any pointers to data in that stack frame. This is achieved by requiring the `FnOnce` closure passed to `thread::spawn` //@ to be valid for lifetime `'static`, as you can see in [its documentation](http://doc.rust-lang.org/stable/std/thread/fn.spawn.html). //@ This avoids another kind of data race, where the thread's access races with the callee deallocating its stack frame. +//@ It is only thanks to the concept of lifetimes that this can be expressed as part of the type of `spawn`. //@ ## Send -//@ However, the story goes further. I said above that `Arc` is a thread-safe version of `Rc`, which uses atomic operations -//@ to manipulate the reference count. It is thus crucial that we don't use `Rc` above, or the reference count may become invalid. -//@ And indeed, if you replace `Arc` by `Rc` (and add the appropriate imports), Rust will tell you that something is wrong. -//@ That's great, of course, but how did it do that? +//@ However, the story goes even further. I said above that `Arc` is a thread-safe version of `Rc`, which uses atomic operations +//@ to manipulate the reference count. It is thus crucial that we don't use `Rc` across multiple threads, or the reference count may +//@ become invalid. And indeed, if you replace `Arc` by `Rc` (and add the appropriate imports), Rust will tell you that something +//@ is wrong. That's great, of course, but how did it do that? //@ //@ The answer is already hinted at in the error: It will say something about `Send`. You may have noticed that the closure in //@ `thread::spawn` does not just have a `'static` bound, but also has to satisfy `Send`. `Send` is a trait, and just like `Copy`, -//@ it's just a marker - there are no functions provided by `Send` What the trait says is that types which are `Send`, can be +//@ it's just a marker - there are no functions provided by `Send`. What the trait says is that types which are `Send`, can be //@ safely sent to another thread without causing trouble. Of course, all the primitive data-types are `Send`. So is `Arc`, //@ which is why Rust accepted our code. But `Rc` is not `Send`, and for a good reason! //@ @@ -176,4 +185,4 @@ pub fn main() { //@ So if the environment of your closure contains an `Rc`, it won't be `Send`, preventing it from causing trouble. If however every //@ captured variable *is* `Send`, then so is the entire environment, and you are good. -//@ [index](main.html) | [previous](part11.html) | [next](main.html) +//@ [index](main.html) | [previous](part11.html) | [next](part13.html) diff --git a/src/part13.rs b/src/part13.rs index 0121079..bd1fca7 100644 --- a/src/part13.rs +++ b/src/part13.rs @@ -1,21 +1,21 @@ // Rust-101, Part 13: Slices, Arrays, External Dependencies -// ================= +// ======================================================== //@ To complete rgrep, there are two pieces we still need to implement: Sorting, and taking the job options //@ as argument to the program, rather than hard-coding them. Let's start with sorting. // ## Slices //@ Again, we first have to think about the type we want to give to our sorting function. We may be inclined to -//@ pass it a `Vec`. Now, sorting does not actually consume the argument, so we could make that a `&mut Vec`. +//@ pass it a `Vec`. Of course, sorting does not actually consume the argument, so we should make that a `&mut Vec`. //@ But there's a problem with that: If we want to implement some divide-and-conquer sorting algorithm (say, //@ Quicksort), then we will have to *split* our argument at some point, and operate recursively on the two parts. //@ But we can't split a `Vec`! We could now extend the function signature to also take some indices, marking the //@ part of the vector we are supposed to sort, but that's all rather clumsy. Rust offers a nicer solution. -//@ + //@ `[T]` is the type of an (unsized) *array*, with elements of type `T`. All this means is that there's a contiguous -//@ region of memory, where a bunch of `T` are stored. How many` We can't tell! This is an unsized type. Just like for -//@ trait objects, this means we can only operate on pointers to that type, and these pointers will containing the missing -//@ information - namely, the length. Such a pointer is called a *slice*. As we will see, a slice can be split! +//@ region of memory, where a bunch of `T` are stored. How many? We can't tell! This is an unsized type. Just like for +//@ trait objects, this means we can only operate on pointers to that type, and these pointers will carry the missing +//@ information - namely, the length. Such a pointer is called a *slice*. As we will see, a slice can be split. //@ Our function can thus take a borrowed slice, and promise to sort all elements in there. pub fn sort(data: &mut [T]) { if data.len() < 2 { return; } @@ -24,9 +24,11 @@ pub fn sort(data: &mut [T]) { // making sure that everything on the left is no larger than the pivot, and everything on the right is no smaller. let mut lpos = 1; let mut rpos = data.len(); - /* Invariant: pivot is data[0]; everything with index (0,lpos) is <= pivot; [rpos,len) is >= pivot; lpos < rpos */ + /* Invariant: pivot is data[0]; everything with index (0,lpos) is <= pivot; + [rpos,len) is >= pivot; lpos < rpos */ loop { - // **Exercise 13.1**: Complete this Quicksort loop. You can use `swap` on slices to swap two elements. + // **Exercise 13.1**: Complete this Quicksort loop. You can use `swap` on slices to swap two elements. Write a + // test function for `sort`. unimplemented!() } @@ -38,14 +40,14 @@ pub fn sort(data: &mut [T]) { //@ one in the middle, and set the lengths appropriately such that they don't overlap. This is what `split_at_mut` does. //@ Since the two slices don't overlap, there is no aliasing and we can have them both mutably borrowed. let (part1, part2) = data.split_at_mut(lpos); - //@ The index operation can not only be used to address certain elements, it can also be used for "slicing": Giving a range + //@ The index operation can not only be used to address certain elements, it can also be used for *slicing*: Giving a range //@ of indices, and obtaining an appropriate part of the slice we started with. Here, we remove the last element from //@ `part1`, which is the pivot. This makes sure both recursive calls work on strictly smaller slices. sort(&mut part1[..lpos-1]); /*@*/ sort(part2); /*@*/ } -// **Exercise 13.2*: Since `String` implements `PartialEq`, you can now change the function `output_lines` in the previous part +// **Exercise 13.2**: Since `String` implements `PartialEq`, you can now change the function `output_lines` in the previous part // to call the sort function above. If you did exercise 12.1, you will have slightly more work. Make sure you sort by the matched line // only, not by filename or line number! @@ -61,8 +63,8 @@ fn sort_nums(data: &mut Vec) { //@ numbers, all one right next to the other in memory. Arrays are sized, and hence can be used like any other type. But we can also //@ borrow them as slices, e.g., to sort them. fn sort_array() { - let mut data: [f64; 5] = [1.0, 3.4, 12.7, -9.12, 0.1]; - sort(&mut data); + let mut array_of_data: [f64; 5] = [1.0, 3.4, 12.7, -9.12, 0.1]; + sort(&mut array_of_data); } // ## External Dependencies @@ -86,8 +88,8 @@ fn sort_array() { //@ path. All of this is explained in the [Cargo Guide](http://doc.crates.io/guide.html). // I disabled the following module (using a rather bad hack), because it only compiles if `docopt` is linked. However, before enabling it, -// you still have get the external library into the global namespace. This is done with `extern crate docopt;`, and that statement *has* to be -// in `main.rs`. So please go there, and enable this commented-out line. Then remove the attribute of the following module. +// you still have get the external library into the global namespace. This is done with `extern crate docopt`, and that statement *has* to be +// in `main.rs`. So please go there, and enable this commented-out line. Then remove the attribute of the `rgrep` module. #[cfg(feature = "disabled")] pub mod rgrep { // Now that `docopt` is linked and declared in `main.rs`, we can import it with `use`. We also import some other pieces that we will need. @@ -95,7 +97,7 @@ pub mod rgrep { use part12::{run, Options, OutputMode}; use std::process; - // The USAGE string documents how the program is to be called. It's written in a format that `docopt` can parse. + // The `USAGE` string documents how the program is to be called. It's written in a format that `docopt` can parse. static USAGE: &'static str = " Usage: rgrep [-c] [-s] ... @@ -106,7 +108,12 @@ Options: // This function extracts the rgrep options from the command-line arguments. fn get_options() -> Options { - // Parse argv and exit the program with an error message if it fails. This is taken from the [`docopt` documentation](http://burntsushi.net/rustdoc/docopt/). + // Parse `argv` and exit the program with an error message if it fails. This is taken from the [`docopt` documentation](http://burntsushi.net/rustdoc/docopt/). + //@ The function `and_then` takes a closure from `T` to `Result`, and uses it to transform a `Result` to a + //@ `Result`. This way, we can chain computations that only happen if the previous one succeeded (and the error + //@ type has to stay the same). In case you know about monads, this style of programming will be familiar to you. + //@ There's a similar function for `Option`. `unwrap_or_else` is a bit like `unwrap`, but rather than panicking in + //@ case of an `Err`, it calls the closure. let args = Docopt::new(USAGE).and_then(|d| d.parse()).unwrap_or_else(|e| e.exit()); // Now we can get all the values out. let count = args.get_bool("-c"); @@ -119,15 +126,23 @@ Options: } // We need to make the strings owned to construct the `Options` instance. - //@ If you check all the type carefully, you will notice that `pattern` above if of type `&str`. `str` is the type of a UTF-8 encoded string, that is, a bunch of - //@ bytes in memory (`[u8]`) that are valid according of UTF-8. `str` is unsized. `&str` is a sliced string, and stores the address of the character data, and - //@ their length. String literals like "this one" are of type `&'static str`: They point right to the constant section of the binary, you you cannot claim you - //@ own them. However, the borrow is valid for as long as the program runs, hence it has lifetime `'static`. Calling `to_string` will copy the string data - //@ into an owned buffer on the heap, and thus convert it to `String`. + //@ If you check all the types carefully, you will notice that `pattern` above is of type `&str`. `str` is the type of a UTF-8 + //@ encoded string, that is, a bunch of bytes in memory (`[u8]`) that are valid according of UTF-8. `str` is unsized. `&str` + //@ stores the address of the character data, and their length. String literals like "this one" are + //@ of type `&'static str`: They point right to the constant section of the binary, so + //@ However, the borrow is valid for as long as the program runs, hence it has lifetime `'static`. Calling + //@ `to_string` will copy the string data into an owned buffer on the heap, and thus convert it to `String`. + let mode = if count { + OutputMode::Count + } else if sort { + OutputMode::SortAndPrint + } else { + OutputMode::Print + }; Options { files: files.iter().map(|file| file.to_string()).collect(), pattern: pattern.to_string(), - output_mode: if count { OutputMode::Count } else if sort { OutputMode::SortAndPrint } else { OutputMode::Print }, + output_mode: mode, } } @@ -141,5 +156,6 @@ Options: // **Exercise 13.3**: Wouldn't it be nice if rgrep supported regular expressions? There's already a crate that does all the parsing and matching on regular // expression, it's called [regex](https://crates.io/crates/regex). Add this crate to the dependencies of your workspace, add an option ("-r") to switch // the pattern to regular-expression mode, and change `filter_lines` to honor this option. The documentation of regex is available from its crates.io site. +// (You won't be able to use the `regex!` macro if you are on the stable or beta channel of Rust. But it wouldn't help for our use-case anyway.) //@ [index](main.html) | [previous](part12.html) | [next](main.html) diff --git a/workspace/src/part11.rs b/workspace/src/part11.rs index 7e45540..cc2a252 100644 --- a/workspace/src/part11.rs +++ b/workspace/src/part11.rs @@ -41,7 +41,10 @@ mod callbacks { c.call(0); let mut count: usize = 0; - c.register(Box::new(move |val| { count = count+1; println!("Callback 2, {}. time: {}", count, val); } )); + c.register(Box::new(move |val| { + count = count+1; + println!("Callback 2, {}. time: {}", count, val); + } )); c.call(1); c.call(2); } } diff --git a/workspace/src/part12.rs b/workspace/src/part12.rs index 84d47ec..17e26ff 100644 --- a/workspace/src/part12.rs +++ b/workspace/src/part12.rs @@ -1,5 +1,5 @@ -// Rust-101, Part 12: Concurrency (WIP) -// ================= +// Rust-101, Part 12: Concurrency, Send +// ==================================== use std::io::prelude::*; use std::{io, fs, thread}; @@ -9,7 +9,6 @@ use std::sync::Arc; // Before we come to the actual code, we define a data-structure `Options` to store all the information we need // to complete the job: Which files to work on, which pattern to look for, and how to output.
-// Besides just printing all the matching lines, we will also offer to count them, or alternatively to sort them. #[derive(Clone,Copy)] pub enum OutputMode { Print, @@ -25,7 +24,7 @@ pub struct Options { } -// The first functions reads the files, and sends every line over the `out_channel`. +// The first function reads the files, and sends every line over the `out_channel`. fn read_files(options: Arc, out_channel: SyncSender) { for file in options.files.iter() { // First, we open the file, ignoring any errors. @@ -43,11 +42,13 @@ fn read_files(options: Arc, out_channel: SyncSender) { // The second function filters the lines it receives through `in_channel` with the pattern, and sends // matches via `out_channel`. -fn filter_lines(options: Arc, in_channel: Receiver, out_channel: SyncSender) { +fn filter_lines(options: Arc, + in_channel: Receiver, + out_channel: SyncSender) { // We can simply iterate over the channel, which will stop when the channel is closed. for line in in_channel.iter() { // `contains` works on lots of types of patterns, but in particular, we can use it to test whether - // one string is contained in another. + // one string is contained in another. This is another example of Rust using traits as substitute for overloading. if line.contains(&options.pattern) { unimplemented!() } @@ -83,7 +84,7 @@ pub fn run(options: Options) { // We move the `options` into an `Arc`, as that's what the thread workers expect. let options = Arc::new(options); - // Set up the channels. Use `sync_channel` with buffer-size of 16 to avoid needlessly filling RAM. + // This sets up the channels. We use a `sync_channel` with buffer-size of 16 to avoid needlessly filling RAM. let (line_sender, line_receiver) = sync_channel(16); let (filtered_sender, filtered_receiver) = sync_channel(16); @@ -93,7 +94,9 @@ pub fn run(options: Options) { // Same with the filter thread. let options2 = options.clone(); - let handle2 = thread::spawn(move || filter_lines(options2, line_receiver, filtered_sender)); + let handle2 = thread::spawn(move || { + filter_lines(options2, line_receiver, filtered_sender) + }); // And the output thread. let options3 = options.clone(); @@ -108,14 +111,16 @@ pub fn run(options: Options) { // Now we have all the pieces together for testing our rgrep with some hard-coded options. pub fn main() { let options = Options { - files: vec!["src/part10.rs".to_string(), "src/part11.rs".to_string(), "src/part12.rs".to_string()], + files: vec!["src/part10.rs".to_string(), + "src/part11.rs".to_string(), + "src/part12.rs".to_string()], pattern: "let".to_string(), output_mode: Print }; run(options); } -// **Exercise 12.1**: Change rgrep such that it prints now only the matching lines, but also the name of the file +// **Exercise 12.1**: Change rgrep such that it prints not only the matching lines, but also the name of the file // and the number of the line in the file. You will have to change the type of the channels from `String` to something // that records this extra information. diff --git a/workspace/src/part13.rs b/workspace/src/part13.rs index 3ef7785..311eba5 100644 --- a/workspace/src/part13.rs +++ b/workspace/src/part13.rs @@ -1,8 +1,9 @@ // Rust-101, Part 13: Slices, Arrays, External Dependencies -// ================= +// ======================================================== // ## Slices + pub fn sort(data: &mut [T]) { if data.len() < 2 { return; } @@ -10,9 +11,11 @@ pub fn sort(data: &mut [T]) { // making sure that everything on the left is no larger than the pivot, and everything on the right is no smaller. let mut lpos = 1; let mut rpos = data.len(); - /* Invariant: pivot is data[0]; everything with index (0,lpos) is <= pivot; [rpos,len) is >= pivot; lpos < rpos */ + /* Invariant: pivot is data[0]; everything with index (0,lpos) is <= pivot; + [rpos,len) is >= pivot; lpos < rpos */ loop { - // **Exercise 13.1**: Complete this Quicksort loop. You can use `swap` on slices to swap two elements. + // **Exercise 13.1**: Complete this Quicksort loop. You can use `swap` on slices to swap two elements. Write a + // test function for `sort`. unimplemented!() } @@ -24,7 +27,7 @@ pub fn sort(data: &mut [T]) { unimplemented!() } -// **Exercise 13.2*: Since `String` implements `PartialEq`, you can now change the function `output_lines` in the previous part +// **Exercise 13.2**: Since `String` implements `PartialEq`, you can now change the function `output_lines` in the previous part // to call the sort function above. If you did exercise 12.1, you will have slightly more work. Make sure you sort by the matched line // only, not by filename or line number! @@ -35,16 +38,16 @@ fn sort_nums(data: &mut Vec) { // ## Arrays fn sort_array() { - let mut data: [f64; 5] = [1.0, 3.4, 12.7, -9.12, 0.1]; - sort(&mut data); + let mut array_of_data: [f64; 5] = [1.0, 3.4, 12.7, -9.12, 0.1]; + sort(&mut array_of_data); } // ## External Dependencies // I disabled the following module (using a rather bad hack), because it only compiles if `docopt` is linked. However, before enabling it, -// you still have get the external library into the global namespace. This is done with `extern crate docopt;`, and that statement *has* to be -// in `main.rs`. So please go there, and enable this commented-out line. Then remove the attribute of the following module. +// you still have get the external library into the global namespace. This is done with `extern crate docopt`, and that statement *has* to be +// in `main.rs`. So please go there, and enable this commented-out line. Then remove the attribute of the `rgrep` module. #[cfg(feature = "disabled")] pub mod rgrep { // Now that `docopt` is linked and declared in `main.rs`, we can import it with `use`. We also import some other pieces that we will need. @@ -52,7 +55,7 @@ pub mod rgrep { use part12::{run, Options, OutputMode}; use std::process; - // The USAGE string documents how the program is to be called. It's written in a format that `docopt` can parse. + // The `USAGE` string documents how the program is to be called. It's written in a format that `docopt` can parse. static USAGE: &'static str = " Usage: rgrep [-c] [-s] ... @@ -63,7 +66,7 @@ Options: // This function extracts the rgrep options from the command-line arguments. fn get_options() -> Options { - // Parse argv and exit the program with an error message if it fails. This is taken from the [`docopt` documentation](http://burntsushi.net/rustdoc/docopt/). + // Parse `argv` and exit the program with an error message if it fails. This is taken from the [`docopt` documentation](http://burntsushi.net/rustdoc/docopt/). let args = Docopt::new(USAGE).and_then(|d| d.parse()).unwrap_or_else(|e| e.exit()); // Now we can get all the values out. let count = args.get_bool("-c"); @@ -76,10 +79,17 @@ Options: } // We need to make the strings owned to construct the `Options` instance. + let mode = if count { + OutputMode::Count + } else if sort { + OutputMode::SortAndPrint + } else { + OutputMode::Print + }; Options { files: files.iter().map(|file| file.to_string()).collect(), pattern: pattern.to_string(), - output_mode: if count { OutputMode::Count } else if sort { OutputMode::SortAndPrint } else { OutputMode::Print }, + output_mode: mode, } } @@ -93,4 +103,5 @@ Options: // **Exercise 13.3**: Wouldn't it be nice if rgrep supported regular expressions? There's already a crate that does all the parsing and matching on regular // expression, it's called [regex](https://crates.io/crates/regex). Add this crate to the dependencies of your workspace, add an option ("-r") to switch // the pattern to regular-expression mode, and change `filter_lines` to honor this option. The documentation of regex is available from its crates.io site. +// (You won't be able to use the `regex!` macro if you are on the stable or beta channel of Rust. But it wouldn't help for our use-case anyway.) -- 2.30.2 From 18e6fec08956d95a3fd1b4b1ef2a7bb9620c5fcf Mon Sep 17 00:00:00 2001 From: Ralf Jung Date: Tue, 14 Jul 2015 14:50:36 +0200 Subject: [PATCH 04/16] add some structure to the list of parts --- src/main.rs | 23 +++++++++++++++-------- src/part12.rs | 4 ++-- workspace/src/part12.rs | 4 ++-- 3 files changed, 19 insertions(+), 12 deletions(-) diff --git a/src/main.rs b/src/main.rs index 0aa0429..3a42ec0 100644 --- a/src/main.rs +++ b/src/main.rs @@ -58,19 +58,18 @@ // Course Content // -------------- // -// The part 00-03 cover some basic of the language, to give you a feeling for Rust's syntax and pervasive -// mechanisms like pattern matching and traits. Parts 04-06 introduce the heart of the language, the ideas -// making it different from anything else out there: Ownership, borrowing, lifetimes. In part 07-??, we -// continue our tour through Rust with another example. Finally, in parts ??-??, we implement our own -// version of `grep`, exhibiting some more Rust features as we go. -// -// Now, open `your-workspace/src/part00.rs` in your favorite editor, and follow the link below for +// Open `your-workspace/src/part00.rs` in your favorite editor, and follow the link below for // the explanations and exercises. Have fun! // +// ### Introduction +// // * [Part 00: Algebraic datatypes](part00.html) // * [Part 01: Expressions, Inherent methods](part01.html) // * [Part 02: Generic types, Traits](part02.html) // * [Part 03: Input](part03.html) +// +// ### Basic Rust +// // * [Part 04: Ownership, Borrowing](part04.html) // * [Part 05: Clone](part05.html) // * [Part 06: Copy, Lifetimes](part06.html) @@ -79,9 +78,17 @@ // * [Part 09: Iterators](part09.html) // * [Part 10: Closures](part10.html) // * [Part 11: Trait Objects, Box, Rc, Lifetime bounds](part11.html) -// * [Part 12: Concurrency, Send](part12.html) +// +// ### Concurrency +// +// * [Part 12: Concurrency, Arc, Send](part12.html) // * [Part 13: Slices, Arrays, External Dependencies](part13.html) // * (to be continued) +// +// ### Advanced Rust +// +// * (to be continued) +// #![allow(dead_code, unused_imports, unused_variables, unused_mut, unreachable_code)] /* extern crate docopt; */ mod part00; diff --git a/src/part12.rs b/src/part12.rs index 8a14def..3e959f9 100644 --- a/src/part12.rs +++ b/src/part12.rs @@ -1,5 +1,5 @@ -// Rust-101, Part 12: Concurrency, Send -// ==================================== +// Rust-101, Part 12: Concurrency, Arc, Send +// ========================================= use std::io::prelude::*; use std::{io, fs, thread}; diff --git a/workspace/src/part12.rs b/workspace/src/part12.rs index 17e26ff..4996ac1 100644 --- a/workspace/src/part12.rs +++ b/workspace/src/part12.rs @@ -1,5 +1,5 @@ -// Rust-101, Part 12: Concurrency, Send -// ==================================== +// Rust-101, Part 12: Concurrency, Arc, Send +// ========================================= use std::io::prelude::*; use std::{io, fs, thread}; -- 2.30.2 From ccf679adb3790903849f7d85b970b67582220952 Mon Sep 17 00:00:00 2001 From: Ralf Jung Date: Tue, 14 Jul 2015 19:56:34 +0200 Subject: [PATCH 05/16] Add first version of part 14 --- solutions/src/counter.rs | 56 +++++++++++++++ solutions/src/main.rs | 1 + src/main.rs | 1 + src/part12.rs | 3 + src/part13.rs | 2 +- src/part14.rs | 143 +++++++++++++++++++++++++++++++++++++++ workspace/src/main.rs | 1 + workspace/src/part14.rs | 69 +++++++++++++++++++ 8 files changed, 275 insertions(+), 1 deletion(-) create mode 100644 solutions/src/counter.rs create mode 100644 src/part14.rs create mode 100644 workspace/src/part14.rs diff --git a/solutions/src/counter.rs b/solutions/src/counter.rs new file mode 100644 index 0000000..265fb99 --- /dev/null +++ b/solutions/src/counter.rs @@ -0,0 +1,56 @@ +use std::sync::{Arc, RwLock}; +use std::thread; + +#[derive(Clone)] +struct ConcurrentCounter(Arc>); + +impl ConcurrentCounter { + // The constructor should not be surprising. + pub fn new(val: usize) -> Self { + ConcurrentCounter(Arc::new(RwLock::new(val))) + } + + pub fn increment(&self, by: usize) { + let mut counter = self.0.write().unwrap(); + *counter = *counter + by; + } + + pub fn get(&self) -> usize { + let counter = self.0.read().unwrap(); + *counter + } +} + +// Now our counter is ready for action. +pub fn main() { + let counter = ConcurrentCounter::new(0); + + // We clone the counter for the first thread, which increments it by 2 every 15ms. + let counter1 = counter.clone(); + let handle1 = thread::spawn(move || { + for _ in 0..10 { + thread::sleep_ms(15); + counter1.increment(2); + } + }); + + // The second thread increments the counter by 3 every 20ms. + let counter2 = counter.clone(); + let handle2 = thread::spawn(move || { + for _ in 0..10 { + thread::sleep_ms(20); + counter2.increment(3); + } + }); + + // Now we want to watch the threads working on the counter. + for _ in 0..50 { + thread::sleep_ms(5); + println!("Current value: {}", counter.get()); + } + + // Finally, wait for all the threads to finish to be sure we can catch the counter's final value. + handle1.join().unwrap(); + handle2.join().unwrap(); + println!("Final value: {}", counter.get()); +} diff --git a/solutions/src/main.rs b/solutions/src/main.rs index a0e3f72..be6e3d5 100644 --- a/solutions/src/main.rs +++ b/solutions/src/main.rs @@ -8,6 +8,7 @@ extern crate docopt; pub mod bigint; pub mod vec; pub mod rgrep; +pub mod counter; pub fn main() { rgrep::main(); diff --git a/src/main.rs b/src/main.rs index 3a42ec0..55a8aa9 100644 --- a/src/main.rs +++ b/src/main.rs @@ -105,6 +105,7 @@ mod part10; mod part11; mod part12; mod part13; +mod part14; // To actually run the code of some part (after filling in the blanks, if necessary), simply edit the `main` // function. diff --git a/src/part12.rs b/src/part12.rs index 3e959f9..dc0da61 100644 --- a/src/part12.rs +++ b/src/part12.rs @@ -124,6 +124,9 @@ pub fn run(options: Options) { let handle3 = thread::spawn(move || output_lines(options3, filtered_receiver)); // Finally, wait until all three threads did their job. + //@ Joining a thread waits for its termination. This can fail if that thread panicked: In this case, we could get + //@ access to the data that it provided to `panic!`. Here, we just assert that they did not panic - so we will panic ourselves + //@ if that happened. handle1.join().unwrap(); handle2.join().unwrap(); handle3.join().unwrap(); diff --git a/src/part13.rs b/src/part13.rs index bd1fca7..811411c 100644 --- a/src/part13.rs +++ b/src/part13.rs @@ -69,7 +69,7 @@ fn sort_array() { // ## External Dependencies //@ This leaves us with just one more piece to complete rgrep: Taking arguments from the command-line. We could now directly work on -//@ [`std::env::args`](http://doc.rust-lang.org/beta/std/env/fn.args.html) to gain access to those arguments, and this would become +//@ [`std::env::args`](http://doc.rust-lang.org/stable/std/env/fn.args.html) to gain access to those arguments, and this would become //@ a pretty boring lesson in string manipulation. Instead, I want to use this opportunity to show how easy it is to benefit from //@ other people's work in your program. //@ diff --git a/src/part14.rs b/src/part14.rs new file mode 100644 index 0000000..32f0fcd --- /dev/null +++ b/src/part14.rs @@ -0,0 +1,143 @@ +// Rust-101, Part 14: Mutex, Sync (WIP) +// ============================== + +use std::sync::{Arc, Mutex}; +use std::thread; + +//@ We already saw that we can use `Arc` to share memory between threads. However, `Arc` can only provide everybody +//@ with *read-only* to memory: Since there is aliasing, Rust cannot, in general, permit mutation. If however, +//@ some care would be taken at run-time, then mutation would still be all right: We have to ensure that whenever +//@ someone changes the data, nobody else is working on it. In other words, we need a *critical section* or (as it +//@ is called in Rust) a [`Mutex`](http://doc.rust-lang.org/stable/std/sync/struct.Mutex.html). Some other languages also call this a *lock*. +//@ +//@ As an example, let us write a concurrent counter. As usual, we first have to think about our data-structure in Rust. +//@ In case of the mutex, this means we have to declare the type of the data that we want to be protected. In Rust, +//@ a `Mutex` protects data, not code. This is generally considered good style, but other languages typically lack +//@ the ability to actually enforce this. As we will see, it is impossible to forget to acquire the mutex in Rust. +//@ Of course, we want multiple threads to have access to this `Mutex`, so we wrap it in an `Arc`. +//@ +//@ Rather than giving every field a name, a struct can also be defined by just giving a sequence of types (similar +//@ to how a variant of an `enum` is defined). This is called a *tuple struct*. It is often used when constructing +//@ a *newtype*, as we do here: `ConcurrentCounter` is essentially just a new name for `Arc>`. However, +//@ is is a locally declared types, so we can give it an inherent implementation and implement traits for it. Since the +//@ field is private, nobody outside this module can even know the type we are wrapping. + +// The derived `Clone` implementation will clone the `Arc`, so all clones will actually talk about the same counter. +#[derive(Clone)] +struct ConcurrentCounter(Arc>); + +impl ConcurrentCounter { + // The constructor should not be surprising. + pub fn new(val: usize) -> Self { + ConcurrentCounter(Arc::new(Mutex::new(val))) + } + + //@ The core operation is, of course, `increment`. The type may be surprising at first: A shared borrow? + //@ How can this be, since `increment` definitely modifies the counter? We already discussed above that `Mutex` is + //@ a way to get around this restriction in Rust. This phenomenon of data that can be mutated through a shared + //@ borrow is called *interior mutability*: We are changing the inner parts of the object, but seen from the outside, + //@ this does not count as "mutation". This stands in contrast to *exterior mutability*, which is the kind of + //@ mutability we saw so far, where one piece of data is replaced by something else of the same type. If you are familiar + //@ with languages like ML, you can compare this to how something of type `ref` permit mutation, even though it is + //@ itself a functional value (more precisely, a location) like all the others. + //@ + //@ Interior mutability breaks the rules of Rust that I outlined earlier: There is aliasing (a shared borrow) and mutation. + //@ The reason that this still works is careful programming of the primitives for interior mutability - in this case, that's + //@ `Mutex`. It has to ensure with dynamic checks, at run-time, that things don't fall apart. In particular, it has to ensure + //@ that the data covered by the mutex can only ever be accessed from inside a critical section. This is where Rust's type + //@ system comes into play: With its discipline of ownership and borrowing, it can enforce such rules. Let's see how this goes. + pub fn increment(&self, by: usize) { + // `lock` on a mutex returns a *guard*, giving access to the data contained in the mutex. + //@ (We will discuss the `unwrap` soon.) `.0` is how we access the first component of a tuple or a struct. + let mut counter = self.0.lock().unwrap(); + *counter = *counter + by; + //@ At the end of the function, `counter` is dropped and the mutex is available again. + //@ This can only happen when full ownership of the guard is given up. In particular, it is impossible for us + //@ to borrow some of its content, release the lock of the mutex, and subsequently access the protected data without holding + //@ the lock. Enforcing the locking discipline is expressible in the Rust type system, so we don't have to worry + //@ about data races *even though* we are mutating shared memory! + //@ + //@ One of the subtle aspects of locking is *poisoning*. If a thread panics while it holds a lock, it could leave the + //@ data-structure in a bad state. The lock is hence considered *poisoned*. Future attempts to `lock` it will thus fail. + //@ Above, we simply assert via `unwrap` that this will never happen. Alternatively, we could have a look at the poisoned + //@ state and attempt to recover from it. + } + + pub fn get(&self) -> usize { + let counter = self.0.lock().unwrap(); + *counter + } +} + +// Now our counter is ready for action. +pub fn main() { + let counter = ConcurrentCounter::new(0); + + // We clone the counter for the first thread, which increments it by 2 every 15ms. + let counter1 = counter.clone(); + let handle1 = thread::spawn(move || { + for _ in 0..10 { + thread::sleep_ms(15); + counter1.increment(2); + } + }); + + // The second thread increments the counter by 3 every 20ms. + let counter2 = counter.clone(); + let handle2 = thread::spawn(move || { + for _ in 0..10 { + thread::sleep_ms(20); + counter2.increment(3); + } + }); + + // Now we want to watch the threads working on the counter. + for _ in 0..50 { + thread::sleep_ms(5); + println!("Current value: {}", counter.get()); + } + + // Finally, wait for all the threads to finish to be sure we can catch the counter's final value. + handle1.join().unwrap(); + handle2.join().unwrap(); + println!("Final value: {}", counter.get()); +} + +// **Exercise 14.1**: Besides `Mutex`, there's also [`RwLock`](http://doc.rust-lang.org/stable/std/sync/struct.RwLock.html), which +// provides two ways of locking: One that grants only read-only access, to any number of concurrent readers, and another one +// for exclusive write access. (Notice that this is the same pattern we already saw with shared vs. mutable borrows.) Change +// the code above to use `RwLock`, such that multiple calls to `get` can be executed at the same time. + +//@ ## Sync +//@ In part 12, we talked about types that are marked `Send` and thus can be moved to another thread. However, we did *not* +//@ talk about the question whether a borrow is `Send`. For `&mut T`, the answer is: It is `Send` whenever `T` is send. +//@ `&mut` allows moving values back and forth, it is even possible to [`swap`](http://doc.rust-lang.org/beta/std/mem/fn.swap.html) +//@ the contents of two mutably borrowed values. So in terms of concurrency, sending a mutable borrow is very much like +//@ sending full ownership. +//@ +//@ But what about `&T`, a shared borrow? Without interior mutability, it would always be all-right to send such values. +//@ After one, no mutation can be performed, so there can be as many threads accessing the data as we like. In the +//@ presence of interior mutability though, the story gets more complicated. Rust introduces another marker trait for +//@ this purpose: `Sync`. A type `T` is `Sync` if `&T` is `Send`. Just like `Send`, `Sync` has a default implementation +//@ and is thus automatically implemented for a data-structure *if* all its members implement it. +//@ +//@ Almost all the types we saw so far are `Sync`, with the exception of `Rc`. Remember that a shared borrow is good enough +//@ for cloning, and we don't want other threads to clone our local `Rc`, so it must not be `Sync`. The rule of `Mutex` +//@ is to enforce synchronization, so it should not be entirely surprising that `Mutex` is `Send` *and* `Sync` provided that +//@ `T` is `Send`. +//@ +//@ There's also an example of a type that's `Send`, but not `Sync`: [`RefCell`](http://doc.rust-lang.org/beta/std/cell/struct.RefCell.html). +//@ This type is very much like `RwLock`, but it's not thread-safe: "Locking" is done without atomic operations. +//@ One can also see it as a dynamically checked version of Rust's usual borrowing rules. You have to explicitly say +//@ when you want to borrow the data in there shared, or mutably, and Rust will complain at run-time if you have +//@ a mutable borrow while any other borrow is active. You can then write programs that Rust may otherwise not +//@ accept. Sending a shared borrow to this to another thread is dangerous, as the checks are not performed in +//@ a thread-safe manner. However, sending the *entire* `RefCell` is okay, because there's only ever one owner, and all +//@ we need to ensure is that everybody attempting to borrow is in the same thread as the owner. +//@ +//@ You may be curious whether there is a type that's `Sync`, but not `Send`. There are indeed rather esoteric examples +//@ of such types, but that's not a topic I want to go into. In case you are curious, there's a +//@ [Rust RFC](https://github.com/rust-lang/rfcs/blob/master/text/0458-send-improvements.md), which contains a type `RcMut` that would be `Sync` and not `Send`. +//@ You may also be interested in [this blog post](https://huonw.github.io/blog/2015/02/some-notes-on-send-and-sync/) on the topic. + +//@ [index](main.html) | [previous](part13.html) | [next](main.html) diff --git a/workspace/src/main.rs b/workspace/src/main.rs index 7e7c200..26e9eed 100644 --- a/workspace/src/main.rs +++ b/workspace/src/main.rs @@ -15,6 +15,7 @@ mod part10; mod part11; mod part12; mod part13; +mod part14; // This decides which part is actually run. fn main() { diff --git a/workspace/src/part14.rs b/workspace/src/part14.rs new file mode 100644 index 0000000..37afcbc --- /dev/null +++ b/workspace/src/part14.rs @@ -0,0 +1,69 @@ +// Rust-101, Part 14: Mutex, Sync (WIP) +// ============================== + +use std::sync::{Arc, Mutex}; +use std::thread; + + +// The derived `Clone` implementation will clone the `Arc`, so all clones will actually talk about the same counter. +#[derive(Clone)] +struct ConcurrentCounter(Arc>); + +impl ConcurrentCounter { + // The constructor should not be surprising. + pub fn new(val: usize) -> Self { + ConcurrentCounter(Arc::new(Mutex::new(val))) + } + + pub fn increment(&self, by: usize) { + // `lock` on a mutex returns a *guard*, giving access to the data contained in the mutex. + let mut counter = self.0.lock().unwrap(); + *counter = *counter + by; + } + + pub fn get(&self) -> usize { + let counter = self.0.lock().unwrap(); + *counter + } +} + +// Now our counter is ready for action. +pub fn main() { + let counter = ConcurrentCounter::new(0); + + // We clone the counter for the first thread, which increments it by 2 every 15ms. + let counter1 = counter.clone(); + let handle1 = thread::spawn(move || { + for _ in 0..10 { + thread::sleep_ms(15); + counter1.increment(2); + } + }); + + // The second thread increments the counter by 3 every 20ms. + let counter2 = counter.clone(); + let handle2 = thread::spawn(move || { + for _ in 0..10 { + thread::sleep_ms(20); + counter2.increment(3); + } + }); + + // Now we want to watch the threads working on the counter. + for _ in 0..50 { + thread::sleep_ms(5); + println!("Current value: {}", counter.get()); + } + + // Finally, wait for all the threads to finish to be sure we can catch the counter's final value. + handle1.join().unwrap(); + handle2.join().unwrap(); + println!("Final value: {}", counter.get()); +} + +// **Exercise 14.1**: Besides `Mutex`, there's also [`RwLock`](http://doc.rust-lang.org/stable/std/sync/struct.RwLock.html), which +// provides two ways of locking: One that grants only read-only access, to any number of concurrent readers, and another one +// for exclusive write access. (Notice that this is the same pattern we already saw with shared vs. mutable borrows.) Change +// the code above to use `RwLock`, such that multiple calls to `get` can be executed at the same time. + + -- 2.30.2 From 0223210576f27d0743c2d12b890d30f5c2ef6b2d Mon Sep 17 00:00:00 2001 From: Ralf Jung Date: Thu, 16 Jul 2015 17:13:11 +0200 Subject: [PATCH 06/16] finish part 14 --- src/main.rs | 6 ++--- src/part11.rs | 4 ++-- src/part13.rs | 4 ++-- src/part14.rs | 51 ++++++++++++++++++++--------------------- src/part15.rs | 25 ++++++++++++++++++++ workspace/src/main.rs | 1 + workspace/src/part13.rs | 2 +- workspace/src/part14.rs | 19 ++++++++------- workspace/src/part15.rs | 4 ++++ 9 files changed, 73 insertions(+), 43 deletions(-) create mode 100644 src/part15.rs create mode 100644 workspace/src/part15.rs diff --git a/src/main.rs b/src/main.rs index 55a8aa9..901e70d 100644 --- a/src/main.rs +++ b/src/main.rs @@ -78,15 +78,12 @@ // * [Part 09: Iterators](part09.html) // * [Part 10: Closures](part10.html) // * [Part 11: Trait Objects, Box, Rc, Lifetime bounds](part11.html) -// -// ### Concurrency -// // * [Part 12: Concurrency, Arc, Send](part12.html) // * [Part 13: Slices, Arrays, External Dependencies](part13.html) -// * (to be continued) // // ### Advanced Rust // +// * [Part 14: Mutex, Interior Mutability, Sync](part14.html) // * (to be continued) // #![allow(dead_code, unused_imports, unused_variables, unused_mut, unreachable_code)] @@ -106,6 +103,7 @@ mod part11; mod part12; mod part13; mod part14; +mod part15; // To actually run the code of some part (after filling in the blanks, if necessary), simply edit the `main` // function. diff --git a/src/part11.rs b/src/part11.rs index caf866a..cfe6c20 100644 --- a/src/part11.rs +++ b/src/part11.rs @@ -91,8 +91,8 @@ mod callbacks_clone { //@ this pointer is smart: It has a reference count. You can `clone` an `Rc` as often as you want, that doesn't affect the //@ data it contains at all. It only creates more references to the same data. Once all the references are gone, the data is deleted. //@ - //@ Wait a moment, you may say here. Multiple references to the same data? That's aliasing! Indeed, we have to be careful. - //@ Once data is stored in an `Rc`, it is read-only: By dereferencing the smart `Rc`, you can only get a shared borrow of the data. + //@ Wait a moment, you may say here. Multiple references to the same data? That's aliasing! Indeed: + //@ Once data is stored in an `Rc`, it is read-only. By dereferencing the smart `Rc`, you can only get a shared borrow of the data. use std::rc::Rc; //@ Because of this read-only restriction, we cannot use `FnMut` here: We'd be unable to call the function with a mutable borrow diff --git a/src/part13.rs b/src/part13.rs index 811411c..d6483d4 100644 --- a/src/part13.rs +++ b/src/part13.rs @@ -149,7 +149,7 @@ Options: // Finally, we can call the `run` function from the previous part on the options extracted using `get_options`. Edit `main.rs` to call this function. // You can now use `cargo run -- ` to call your program, and see the argument parser and the threads we wrote previously in action! pub fn main() { - run(get_options()); + run(get_options()); /*@*/ } } @@ -158,4 +158,4 @@ Options: // the pattern to regular-expression mode, and change `filter_lines` to honor this option. The documentation of regex is available from its crates.io site. // (You won't be able to use the `regex!` macro if you are on the stable or beta channel of Rust. But it wouldn't help for our use-case anyway.) -//@ [index](main.html) | [previous](part12.html) | [next](main.html) +//@ [index](main.html) | [previous](part12.html) | [next](part14.html) diff --git a/src/part14.rs b/src/part14.rs index 32f0fcd..596094b 100644 --- a/src/part14.rs +++ b/src/part14.rs @@ -1,19 +1,19 @@ -// Rust-101, Part 14: Mutex, Sync (WIP) -// ============================== +// Rust-101, Part 14: Mutex, Interior Mutability, Sync +// =================================================== use std::sync::{Arc, Mutex}; use std::thread; -//@ We already saw that we can use `Arc` to share memory between threads. However, `Arc` can only provide everybody -//@ with *read-only* to memory: Since there is aliasing, Rust cannot, in general, permit mutation. If however, +//@ We already saw that we can use `Arc` to share memory between threads. However, `Arc` can only provide *read-only* +//@ access to memory: Since there is aliasing, Rust cannot, in general, permit mutation. If however, //@ some care would be taken at run-time, then mutation would still be all right: We have to ensure that whenever -//@ someone changes the data, nobody else is working on it. In other words, we need a *critical section* or (as it +//@ someone changes the data, nobody else is looking at it. In other words, we need a *critical section* or (as it //@ is called in Rust) a [`Mutex`](http://doc.rust-lang.org/stable/std/sync/struct.Mutex.html). Some other languages also call this a *lock*. //@ -//@ As an example, let us write a concurrent counter. As usual, we first have to think about our data-structure in Rust. +//@ As an example, let us write a concurrent counter. As usual in Rust, we first have to think about our data layout. //@ In case of the mutex, this means we have to declare the type of the data that we want to be protected. In Rust, -//@ a `Mutex` protects data, not code. This is generally considered good style, but other languages typically lack -//@ the ability to actually enforce this. As we will see, it is impossible to forget to acquire the mutex in Rust. +//@ a `Mutex` protects data, not code - and it is impossible to access the data in any other way. This is generally considered +//@ good style, but other languages typically lack the ability to actually enforce this. //@ Of course, we want multiple threads to have access to this `Mutex`, so we wrap it in an `Arc`. //@ //@ Rather than giving every field a name, a struct can also be defined by just giving a sequence of types (similar @@ -27,9 +27,9 @@ use std::thread; struct ConcurrentCounter(Arc>); impl ConcurrentCounter { - // The constructor should not be surprising. + // The constructor just wraps the constructors of `Arc` and `Mutex`. pub fn new(val: usize) -> Self { - ConcurrentCounter(Arc::new(Mutex::new(val))) + ConcurrentCounter(Arc::new(Mutex::new(val))) /*@*/ } //@ The core operation is, of course, `increment`. The type may be surprising at first: A shared borrow? @@ -38,7 +38,7 @@ impl ConcurrentCounter { //@ borrow is called *interior mutability*: We are changing the inner parts of the object, but seen from the outside, //@ this does not count as "mutation". This stands in contrast to *exterior mutability*, which is the kind of //@ mutability we saw so far, where one piece of data is replaced by something else of the same type. If you are familiar - //@ with languages like ML, you can compare this to how something of type `ref` permit mutation, even though it is + //@ with languages like ML, you can compare this to how something of type `ref` permits mutation, even though it is //@ itself a functional value (more precisely, a location) like all the others. //@ //@ Interior mutability breaks the rules of Rust that I outlined earlier: There is aliasing (a shared borrow) and mutation. @@ -50,6 +50,8 @@ impl ConcurrentCounter { // `lock` on a mutex returns a *guard*, giving access to the data contained in the mutex. //@ (We will discuss the `unwrap` soon.) `.0` is how we access the first component of a tuple or a struct. let mut counter = self.0.lock().unwrap(); + //@ The guard is another example of a smart pointer, and it can be used as if it were a pointer to the data protected + //@ by the lock. *counter = *counter + by; //@ At the end of the function, `counter` is dropped and the mutex is available again. //@ This can only happen when full ownership of the guard is given up. In particular, it is impossible for us @@ -58,14 +60,15 @@ impl ConcurrentCounter { //@ about data races *even though* we are mutating shared memory! //@ //@ One of the subtle aspects of locking is *poisoning*. If a thread panics while it holds a lock, it could leave the - //@ data-structure in a bad state. The lock is hence considered *poisoned*. Future attempts to `lock` it will thus fail. + //@ data-structure in a bad state. The lock is hence considered *poisoned*. Future attempts to `lock` it will fail. //@ Above, we simply assert via `unwrap` that this will never happen. Alternatively, we could have a look at the poisoned //@ state and attempt to recover from it. } + // The function `get` returns the current value of the counter. pub fn get(&self) -> usize { - let counter = self.0.lock().unwrap(); - *counter + let counter = self.0.lock().unwrap(); /*@*/ + *counter /*@*/ } } @@ -91,13 +94,13 @@ pub fn main() { } }); - // Now we want to watch the threads working on the counter. + // Now we watch the threads working on the counter. for _ in 0..50 { thread::sleep_ms(5); println!("Current value: {}", counter.get()); } - // Finally, wait for all the threads to finish to be sure we can catch the counter's final value. + // Finally, we wait for all the threads to finish to be sure we can catch the counter's final value. handle1.join().unwrap(); handle2.join().unwrap(); println!("Final value: {}", counter.get()); @@ -107,16 +110,19 @@ pub fn main() { // provides two ways of locking: One that grants only read-only access, to any number of concurrent readers, and another one // for exclusive write access. (Notice that this is the same pattern we already saw with shared vs. mutable borrows.) Change // the code above to use `RwLock`, such that multiple calls to `get` can be executed at the same time. +// +// **Exercise 14.2**: Add an operation `compare_and_inc(&self, test: usize, by: usize)` that increments the counter by +// `by` *only if* the current value is `test`. //@ ## Sync //@ In part 12, we talked about types that are marked `Send` and thus can be moved to another thread. However, we did *not* //@ talk about the question whether a borrow is `Send`. For `&mut T`, the answer is: It is `Send` whenever `T` is send. //@ `&mut` allows moving values back and forth, it is even possible to [`swap`](http://doc.rust-lang.org/beta/std/mem/fn.swap.html) //@ the contents of two mutably borrowed values. So in terms of concurrency, sending a mutable borrow is very much like -//@ sending full ownership. +//@ sending full ownership, in the sense that it can be used to move the object to another thread. //@ //@ But what about `&T`, a shared borrow? Without interior mutability, it would always be all-right to send such values. -//@ After one, no mutation can be performed, so there can be as many threads accessing the data as we like. In the +//@ After all, no mutation can be performed, so there can be as many threads accessing the data as we like. In the //@ presence of interior mutability though, the story gets more complicated. Rust introduces another marker trait for //@ this purpose: `Sync`. A type `T` is `Sync` if `&T` is `Send`. Just like `Send`, `Sync` has a default implementation //@ and is thus automatically implemented for a data-structure *if* all its members implement it. @@ -126,14 +132,7 @@ pub fn main() { //@ is to enforce synchronization, so it should not be entirely surprising that `Mutex` is `Send` *and* `Sync` provided that //@ `T` is `Send`. //@ -//@ There's also an example of a type that's `Send`, but not `Sync`: [`RefCell`](http://doc.rust-lang.org/beta/std/cell/struct.RefCell.html). -//@ This type is very much like `RwLock`, but it's not thread-safe: "Locking" is done without atomic operations. -//@ One can also see it as a dynamically checked version of Rust's usual borrowing rules. You have to explicitly say -//@ when you want to borrow the data in there shared, or mutably, and Rust will complain at run-time if you have -//@ a mutable borrow while any other borrow is active. You can then write programs that Rust may otherwise not -//@ accept. Sending a shared borrow to this to another thread is dangerous, as the checks are not performed in -//@ a thread-safe manner. However, sending the *entire* `RefCell` is okay, because there's only ever one owner, and all -//@ we need to ensure is that everybody attempting to borrow is in the same thread as the owner. +//@ In the next part, we will learn about a type called `RefCell` that is `Send`, but not `Sync`. //@ //@ You may be curious whether there is a type that's `Sync`, but not `Send`. There are indeed rather esoteric examples //@ of such types, but that's not a topic I want to go into. In case you are curious, there's a diff --git a/src/part15.rs b/src/part15.rs new file mode 100644 index 0000000..7365421 --- /dev/null +++ b/src/part15.rs @@ -0,0 +1,25 @@ +// Rust-101, Part 15: Interior Mutability (cont.), RefCell, Cell, Drop +// =================================================================== + +//@ [`RefCell`](http://doc.rust-lang.org/beta/std/cell/struct.RefCell.html) +//@ [`is very much like `RwLock`, but it's not thread-safe: "Locking" is done without atomic operations. +//@ One can also see it as a dynamically checked version of Rust's usual borrowing rules. You have to explicitly say +//@ when you want to borrow the data in there shared, or mutably, and Rust will complain at run-time if you have +//@ a mutable borrow while any other borrow is active. You can then write programs that Rust may otherwise not +//@ accept. Sending a shared borrow to this to another thread is dangerous, as the checks are not performed in +//@ a thread-safe manner. However, sending the *entire* `RefCell` is okay, because there's only ever one owner, and all +//@ we need to ensure is that everybody attempting to borrow is in the same thread as the owner.
+//@ [`Cell`](http://doc.rust-lang.org/beta/std/cell/struct.Cell.html) is like a stripped-down version of `RefCell`: It doesn't allow +//@ you to borrow its content. Instead, it has a methods `get` and `set` to change the value stored in the cell, and to copy it out. +//@ For obvious reasons, this requires `T` to be `Copy`. +//@ +//@ You can also think about all these types coming from the other end: Starting with `Cell`, we have a primitive for +//@ interior mutability that provides `get` and `set`, both just requiring a shared borrow. Think of these functions as +//@ mutating the *content* of the cell, but not the cell itself, the container. (Just like in ML, where assignment to a +//@ `ref` changes the content, not the location.) However, due to the ownership discipline, `Cell` only works for types +//@ that are `Copy`. Hence we also have `RefCell`, which allows working with the data right in the cell, rather than +//@ having to copy it out. `RefCell` uses non-atomic operations for this purpose, so for the multi-threaded setting, there's +//@ the thread-safe `RwLock`. And finally, in case a distinction between readers and writers is not helpful, one can use the +//@ more efficient `Mutex`. + +//@ [index](main.html) | [previous](part14.html) | [next](main.html) diff --git a/workspace/src/main.rs b/workspace/src/main.rs index 26e9eed..a86a65a 100644 --- a/workspace/src/main.rs +++ b/workspace/src/main.rs @@ -16,6 +16,7 @@ mod part11; mod part12; mod part13; mod part14; +mod part15; // This decides which part is actually run. fn main() { diff --git a/workspace/src/part13.rs b/workspace/src/part13.rs index 311eba5..88d1ea6 100644 --- a/workspace/src/part13.rs +++ b/workspace/src/part13.rs @@ -96,7 +96,7 @@ Options: // Finally, we can call the `run` function from the previous part on the options extracted using `get_options`. Edit `main.rs` to call this function. // You can now use `cargo run -- ` to call your program, and see the argument parser and the threads we wrote previously in action! pub fn main() { - run(get_options()); + unimplemented!() } } diff --git a/workspace/src/part14.rs b/workspace/src/part14.rs index 37afcbc..d939265 100644 --- a/workspace/src/part14.rs +++ b/workspace/src/part14.rs @@ -1,5 +1,5 @@ -// Rust-101, Part 14: Mutex, Sync (WIP) -// ============================== +// Rust-101, Part 14: Mutex, Interior Mutability, Sync +// =================================================== use std::sync::{Arc, Mutex}; use std::thread; @@ -10,9 +10,9 @@ use std::thread; struct ConcurrentCounter(Arc>); impl ConcurrentCounter { - // The constructor should not be surprising. + // The constructor just wraps the constructors of `Arc` and `Mutex`. pub fn new(val: usize) -> Self { - ConcurrentCounter(Arc::new(Mutex::new(val))) + unimplemented!() } pub fn increment(&self, by: usize) { @@ -21,9 +21,9 @@ impl ConcurrentCounter { *counter = *counter + by; } + // The function `get` returns the current value of the counter. pub fn get(&self) -> usize { - let counter = self.0.lock().unwrap(); - *counter + unimplemented!() } } @@ -49,13 +49,13 @@ pub fn main() { } }); - // Now we want to watch the threads working on the counter. + // Now we watch the threads working on the counter. for _ in 0..50 { thread::sleep_ms(5); println!("Current value: {}", counter.get()); } - // Finally, wait for all the threads to finish to be sure we can catch the counter's final value. + // Finally, we wait for all the threads to finish to be sure we can catch the counter's final value. handle1.join().unwrap(); handle2.join().unwrap(); println!("Final value: {}", counter.get()); @@ -65,5 +65,8 @@ pub fn main() { // provides two ways of locking: One that grants only read-only access, to any number of concurrent readers, and another one // for exclusive write access. (Notice that this is the same pattern we already saw with shared vs. mutable borrows.) Change // the code above to use `RwLock`, such that multiple calls to `get` can be executed at the same time. +// +// **Exercise 14.2**: Add an operation `compare_and_inc(&self, test: usize, by: usize)` that increments the counter by +// `by` *only if* the current value is `test`. diff --git a/workspace/src/part15.rs b/workspace/src/part15.rs new file mode 100644 index 0000000..4dca116 --- /dev/null +++ b/workspace/src/part15.rs @@ -0,0 +1,4 @@ +// Rust-101, Part 15: Interior Mutability (cont.), RefCell, Cell, Drop +// =================================================================== + + -- 2.30.2 From 98dafe0138b8bf6584b8d9e86a74a580bb034a26 Mon Sep 17 00:00:00 2001 From: Ralf Jung Date: Thu, 16 Jul 2015 22:29:52 +0200 Subject: [PATCH 07/16] turns out that one *can* use 'extern crate' in submodules --- Cargo.lock | 59 ----------------------------------------- Cargo.toml | 4 +-- src/main.rs | 1 - src/part13.rs | 10 +++---- workspace/src/part13.rs | 10 +++---- 5 files changed, 12 insertions(+), 72 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 67ade17..b44a287 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1,63 +1,4 @@ [root] name = "rust-101" version = "0.1.0" -dependencies = [ - "docopt 0.6.67 (registry+https://github.com/rust-lang/crates.io-index)", -] - -[[package]] -name = "aho-corasick" -version = "0.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "memchr 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)", -] - -[[package]] -name = "docopt" -version = "0.6.67" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "regex 0.1.40 (registry+https://github.com/rust-lang/crates.io-index)", - "rustc-serialize 0.3.15 (registry+https://github.com/rust-lang/crates.io-index)", - "strsim 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)", -] - -[[package]] -name = "libc" -version = "0.1.9" -source = "registry+https://github.com/rust-lang/crates.io-index" - -[[package]] -name = "memchr" -version = "0.1.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "libc 0.1.9 (registry+https://github.com/rust-lang/crates.io-index)", -] - -[[package]] -name = "regex" -version = "0.1.40" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "aho-corasick 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)", - "memchr 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)", - "regex-syntax 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)", -] - -[[package]] -name = "regex-syntax" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" - -[[package]] -name = "rustc-serialize" -version = "0.3.15" -source = "registry+https://github.com/rust-lang/crates.io-index" - -[[package]] -name = "strsim" -version = "0.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" diff --git a/Cargo.toml b/Cargo.toml index e590353..bbc666b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -3,5 +3,5 @@ name = "rust-101" version = "0.1.0" authors = ["Ralf Jung "] -[dependencies] -docopt = "*" +#[dependencies] +#docopt = "*" diff --git a/src/main.rs b/src/main.rs index 901e70d..4290e1e 100644 --- a/src/main.rs +++ b/src/main.rs @@ -87,7 +87,6 @@ // * (to be continued) // #![allow(dead_code, unused_imports, unused_variables, unused_mut, unreachable_code)] -/* extern crate docopt; */ mod part00; mod part01; mod part02; diff --git a/src/part13.rs b/src/part13.rs index d6483d4..29647ea 100644 --- a/src/part13.rs +++ b/src/part13.rs @@ -87,13 +87,13 @@ fn sort_array() { //@ Note that crates.io is only the default location for dependencies, you can also give it the URL of a git repository or some local //@ path. All of this is explained in the [Cargo Guide](http://doc.crates.io/guide.html). -// I disabled the following module (using a rather bad hack), because it only compiles if `docopt` is linked. However, before enabling it, -// you still have get the external library into the global namespace. This is done with `extern crate docopt`, and that statement *has* to be -// in `main.rs`. So please go there, and enable this commented-out line. Then remove the attribute of the `rgrep` module. +// I disabled the following module (using a rather bad hack), because it only compiles if `docopt` is linked. +// Remove the attribute of the `rgrep` module to enable compilation. #[cfg(feature = "disabled")] pub mod rgrep { - // Now that `docopt` is linked and declared in `main.rs`, we can import it with `use`. We also import some other pieces that we will need. - use docopt::Docopt; + // Now that `docopt` is linked, we can first root it in the namespace and then import it with `use`. We also import some other pieces that we will need. + extern crate docopt; + use self::docopt::Docopt; use part12::{run, Options, OutputMode}; use std::process; diff --git a/workspace/src/part13.rs b/workspace/src/part13.rs index 88d1ea6..2be4353 100644 --- a/workspace/src/part13.rs +++ b/workspace/src/part13.rs @@ -45,13 +45,13 @@ fn sort_array() { // ## External Dependencies -// I disabled the following module (using a rather bad hack), because it only compiles if `docopt` is linked. However, before enabling it, -// you still have get the external library into the global namespace. This is done with `extern crate docopt`, and that statement *has* to be -// in `main.rs`. So please go there, and enable this commented-out line. Then remove the attribute of the `rgrep` module. +// I disabled the following module (using a rather bad hack), because it only compiles if `docopt` is linked. +// Remove the attribute of the `rgrep` module to enable compilation. #[cfg(feature = "disabled")] pub mod rgrep { - // Now that `docopt` is linked and declared in `main.rs`, we can import it with `use`. We also import some other pieces that we will need. - use docopt::Docopt; + // Now that `docopt` is linked, we can first root it in the namespace and then import it with `use`. We also import some other pieces that we will need. + extern crate docopt; + use self::docopt::Docopt; use part12::{run, Options, OutputMode}; use std::process; -- 2.30.2 From 188b1ec1b8528e2326791feccc8077e15bd60182 Mon Sep 17 00:00:00 2001 From: Ralf Jung Date: Sat, 18 Jul 2015 16:42:15 +0200 Subject: [PATCH 08/16] split part 11 into two, and explain interior mutability and Cell and RefCell in the new part 12 --- solutions/src/callbacks.rs | 66 ++++++++ solutions/src/counter.rs | 9 +- solutions/src/main.rs | 1 + src/main.rs | 15 +- src/part11.rs | 228 +++++++++++--------------- src/part12.rs | 316 +++++++++++++++++------------------- src/part13.rs | 322 ++++++++++++++++++++----------------- src/part14.rs | 273 ++++++++++++++++--------------- src/part15.rs | 145 ++++++++++++++++- workspace/src/part11.rs | 118 +++++--------- workspace/src/part12.rs | 198 +++++++++++------------ workspace/src/part13.rs | 205 ++++++++++++----------- workspace/src/part14.rs | 143 +++++++++------- workspace/src/part15.rs | 76 ++++++++- 14 files changed, 1197 insertions(+), 918 deletions(-) create mode 100644 solutions/src/callbacks.rs diff --git a/solutions/src/callbacks.rs b/solutions/src/callbacks.rs new file mode 100644 index 0000000..93fcb17 --- /dev/null +++ b/solutions/src/callbacks.rs @@ -0,0 +1,66 @@ +use std::rc::Rc; +use std::cell::RefCell; + +#[derive(Clone)] +pub struct Callbacks { + callbacks: Vec>>, +} + +impl Callbacks { + pub fn new() -> Self { + Callbacks { callbacks: Vec::new() } /*@*/ + } + + pub fn register(&mut self, callback: F) { + let cell = Rc::new(RefCell::new(callback)); + self.callbacks.push(cell); /*@*/ + } + + pub fn call(&self, val: i32) { + for callback in self.callbacks.iter() { + // We have to *explicitly* borrow the contents of a `RefCell`. + //@ At run-time, the cell will keep track of the number of outstanding shared and mutable borrows, + //@ and panic if the rules are violated. Since this function is the only one that borrow the + //@ environments of the closures, and this function requires a *mutable* borrow of `self`, we know this cannot + //@ happen.
+ //@ For this check to be performed, `closure` is a *guard*: Rather than a normal borrow, `borrow_mut` returns + //@ a smart pointer (`RefMut`, in this case) that waits until is goes out of scope, and then + //@ appropriately updates the number of active borrows. + //@ + //@ The function would still typecheck with an immutable borrow of `self` (since we are + //@ relying on the interior mutability of `self`), but then it could happen that a callback + //@ will in turn trigger another round of callbacks, so that `call` would indirectly call itself. + //@ This is called reentrancy. It would imply that we borrow the closure a second time, and + //@ panic at run-time. I hope this also makes it clear that there's absolutely no hope of Rust + //@ performing these checks statically, at compile-time: It would have to detect reentrancy! + let mut closure = callback.borrow_mut(); + // Unfortunately, Rust's auto-dereference of pointers is not clever enough here. We thus have to explicitly + // dereference the smart pointer and obtain a mutable borrow of the target. + (&mut *closure)(val); + } + } +} + +#[cfg(test)] +mod tests { + use std::rc::Rc; + use std::cell::RefCell; + use super::*; + + #[test] + #[should_panic] + fn test_reentrant() { + let c = Rc::new(RefCell::new(Callbacks::new())); + c.borrow_mut().register(|val| println!("Callback called: {}", val) ); + + // If we change the two "borrow" below to "borrow_mut", you can get a panic even with a "call" that requires a + // mutable borrow. However, that panic is then triggered by our own, external `RefCell` (so it's kind of our fault), + // rather than being triggered by the `RefCell` in the `Callbacks`. + { + let c2 = c.clone(); + c.borrow_mut().register(move |val| c2.borrow().call(val+val) ); + } + + c.borrow().call(42); + } +} \ No newline at end of file diff --git a/solutions/src/counter.rs b/solutions/src/counter.rs index 265fb99..afea9d0 100644 --- a/solutions/src/counter.rs +++ b/solutions/src/counter.rs @@ -2,7 +2,7 @@ use std::sync::{Arc, RwLock}; use std::thread; #[derive(Clone)] -struct ConcurrentCounter(Arc>); +pub struct ConcurrentCounter(Arc>); impl ConcurrentCounter { // The constructor should not be surprising. @@ -15,6 +15,13 @@ impl ConcurrentCounter { *counter = *counter + by; } + pub fn compare_and_inc(&self, test: usize, by: usize) { + let mut counter = self.0.write().unwrap(); + if *counter == test { + *counter += by; + } + } + pub fn get(&self) -> usize { let counter = self.0.read().unwrap(); *counter diff --git a/solutions/src/main.rs b/solutions/src/main.rs index be6e3d5..0242f49 100644 --- a/solutions/src/main.rs +++ b/solutions/src/main.rs @@ -9,6 +9,7 @@ pub mod bigint; pub mod vec; pub mod rgrep; pub mod counter; +pub mod callbacks; pub fn main() { rgrep::main(); diff --git a/src/main.rs b/src/main.rs index 4290e1e..1111443 100644 --- a/src/main.rs +++ b/src/main.rs @@ -36,9 +36,9 @@ // --------------- // // You will need to have Rust installed, of course. It is available for download on -// [the Rust website](http://www.rust-lang.org/). You should go for either the "stable" -// or the "beta" channel. More detailed installation instructions are provided in -// [the second chapter of The Book](https://doc.rust-lang.org/stable/book/installing-rust.html). +// [the Rust website](http://www.rust-lang.org/). Make sure you get at least version 1.2 +// (at the time of writing, that's the current beta release). More detailed installation +// instructions are provided in [the second chapter of The Book](https://doc.rust-lang.org/stable/book/installing-rust.html). // This will also install `cargo`, the tool responsible for building rust projects (or *crates*). // Next, fetch the Rust-101 source code from the [git repository](http://www.ralfj.de/git/rust-101.git) @@ -77,13 +77,14 @@ // * [Part 08: Associated Types, Modules](part08.html) // * [Part 09: Iterators](part09.html) // * [Part 10: Closures](part10.html) -// * [Part 11: Trait Objects, Box, Rc, Lifetime bounds](part11.html) -// * [Part 12: Concurrency, Arc, Send](part12.html) -// * [Part 13: Slices, Arrays, External Dependencies](part13.html) // // ### Advanced Rust // -// * [Part 14: Mutex, Interior Mutability, Sync](part14.html) +// * [Part 11: Trait Objects, Box, Lifetime bounds](part11.html) +// * [Part 12: Rc, Interior Mutability, Cell, RefCell](part12.html) +// * [Part 13: Concurrency, Arc, Send](part13.html) +// * [Part 14: Slices, Arrays, External Dependencies](part14.html) +// * [Part 15: Mutex, Interior Mutability (cont.), Sync](part15.html) // * (to be continued) // #![allow(dead_code, unused_imports, unused_variables, unused_mut, unreachable_code)] diff --git a/src/part11.rs b/src/part11.rs index cfe6c20..5cc1462 100644 --- a/src/part11.rs +++ b/src/part11.rs @@ -1,156 +1,120 @@ -// Rust-101, Part 11: Trait Objects, Box, Rc, Lifetime bounds -// ========================================================== +// Rust-101, Part 11: Trait Objects, Box, Lifetime bounds +// ====================================================== //@ We will play around with closures a bit more. Let us implement some kind of generic "callback" -//@ mechanism, providing two functions: Registering a new callback, and calling all registered callbacks. There will be two -//@ versions, so to avoid clashes of names, we put them into modules. -mod callbacks { - //@ First of all, we need to find a way to store the callbacks. Clearly, there will be a `Vec` involved, so that we can - //@ always grow the number of registered callbacks. A callback will be a closure, i.e., something implementing - //@ `FnMut(i32)` (we want to call this multiple times, so clearly `FnOnce` would be no good). So our first attempt may be the following. - // For now, we just decide that the callbacks have an argument of type `i32`. - struct CallbacksV1 { - callbacks: Vec, - } - //@ However, this will not work. Remember how the "type" of a closure is specific to the environment of captured variables. Different closures - //@ all implementing `FnMut(i32)` may have different types. However, a `Vec` is a *uniformly typed* vector. - - //@ We will thus need a way to store things of *different* types in the same vector. We know all these types implement `FnMut(i32)`. For this scenario, - //@ Rust provides *trait objects*: The truth is, `FnMut(i32)` is not just a trait. It is also a type, that can be given to anything implementing - //@ this trait. So, we may write the following. - /* struct CallbacksV2 { - callbacks: Vec, - } */ - //@ But, Rust complains about this definition. It says something about "Sized". What's the trouble? See, for many things we want to do, it is crucial that - //@ Rust knows the precise, fixed size of the type - that is, how large this type will be when represented in memory. For example, for a `Vec`, the - //@ elements are stored one right after the other. How should that be possible, without a fixed size? The trouble is, `FnMut(i32)` could be of any size. - //@ We don't know how large that "type that implemenets `FnMut(i32)`" is. Rust calls this an *unsized* type. Whenever we introduce a type variable, Rust - //@ will implicitly add a bound to that variable, demanding that it is sized. That's why we did not have to worry about this so far.
- //@ You can opt-out of this implicit bound by saying `T: ?Sized`. Then `T` may or may not be sized. - - //@ So, what can we do, if we can't store the callbacks in a vector? We can put them in a box. Semantically, `Box` is a lot like `T`: You fully own - //@ the data stored there. On the machine, however, `Box` is a *pointer* to `T`. It is a lot like `std::unique_ptr` in C++. In our current example, - //@ the important bit is that since it's a pointer, `T` can be unsized, but `Box` itself will always be sized. So we can put it in a `Vec`. - pub struct Callbacks { - callbacks: Vec>, - } - - impl Callbacks { - // Now we can provide some functions. The constructor should be straight-forward. - pub fn new() -> Self { - Callbacks { callbacks: Vec::new() } /*@*/ - } - - // Registration simply stores the callback. - pub fn register(&mut self, callback: Box) { - self.callbacks.push(callback); /*@*/ - } - - // And here we call all the stored callbacks. - pub fn call(&mut self, val: i32) { - // Since they are of type `FnMut`, we need to mutably iterate. Notice that boxes dereference implicitly. - for callback in self.callbacks.iter_mut() { - callback(val); /*@*/ - } - } - } - - // Now we are ready for the demo. - pub fn demo(c: &mut Callbacks) { - c.register(Box::new(|val| println!("Callback 1: {}", val))); - c.call(0); - - //@ We can even register callbacks that modify their environment. Rust will again attempt to borrow `count`. However, - //@ that doesn't work out this time: Since we want to put this thing in a `Box`, it could live longer than the function - //@ we are in. Then the borrow of `count` would become invalid. We have to explicitly tell Rust to `move` ownership of the - //@ variable into the closure. Its environment will then contain a `usize` rather than a `&mut uszie`, and have - //@ no effect on this local variable anymore. - let mut count: usize = 0; - c.register(Box::new(move |val| { - count = count+1; - println!("Callback 2, {}. time: {}", count, val); - } )); - c.call(1); c.call(2); - } +//@ mechanism, providing two functions: Registering a new callback, and calling all registered callbacks. + +//@ First of all, we need to find a way to store the callbacks. Clearly, there will be a `Vec` involved, so that we can +//@ always grow the number of registered callbacks. A callback will be a closure, i.e., something implementing +//@ `FnMut(i32)` (we want to call this multiple times, so clearly `FnOnce` would be no good). So our first attempt may be the following. +// For now, we just decide that the callbacks have an argument of type `i32`. +struct CallbacksV1 { + callbacks: Vec, } - -// Remember to edit `main.rs` to run the demo. -pub fn main() { - let mut c = callbacks::Callbacks::new(); - callbacks::demo(&mut c); +//@ However, this will not work. Remember how the "type" of a closure is specific to the environment of captured variables. Different closures +//@ all implementing `FnMut(i32)` may have different types. However, a `Vec` is a *uniformly typed* vector. + +//@ We will thus need a way to store things of *different* types in the same vector. We know all these types implement `FnMut(i32)`. For this scenario, +//@ Rust provides *trait objects*: The truth is, `FnMut(i32)` is not just a trait. It is also a type, that can be given to anything implementing +//@ this trait. So, we may write the following. +/* struct CallbacksV2 { + callbacks: Vec, +} */ +//@ But, Rust complains about this definition. It says something about "Sized". What's the trouble? See, for many things we want to do, it is crucial that +//@ Rust knows the precise, fixed size of the type - that is, how large this type will be when represented in memory. For example, for a `Vec`, the +//@ elements are stored one right after the other. How should that be possible, without a fixed size? The point is, `FnMut(i32)` could be of any size. +//@ We don't know how large that "type that implemenets `FnMut(i32)`" is. Rust calls this an *unsized* type. Whenever we introduce a type variable, Rust +//@ will implicitly add a bound to that variable, demanding that it is sized. That's why we did not have to worry about this so far.
+//@ You can opt-out of this implicit bound by saying `T: ?Sized`. Then `T` may or may not be sized. + +//@ So, what can we do, if we can't store the callbacks in a vector? We can put them in a box. Semantically, `Box` is a lot like `T`: You fully own +//@ the data stored there. On the machine, however, `Box` is a *pointer* to a heap-allocated `T`. It is a lot like `std::unique_ptr` in C++. In our current example, +//@ the important bit is that since it's a pointer, `T` can be unsized, but `Box` itself will always be sized. So we can put it in a `Vec`. +pub struct Callbacks { + callbacks: Vec>, } -mod callbacks_clone { - //@ So, this worked great, didn't it! There's one point though that I'd like to emphasize: One cannot `clone` a closure. - //@ Hence it becomes impossible to implement `Clone` for our `Callbacks` type. What could we do about this? - - //@ You already learned about `Box` above. `Box` is an example of a *smart pointer*: It's like a pointer (in the C - //@ sense), but with some additional smarts to it. For `Box`, that's the part about ownership. Once you drop the box, the - //@ content it points to will be deleted.
- //@ Another example of a smart pointer is `Rc`. This is short for *reference-counter*, so you can already guess how - //@ this pointer is smart: It has a reference count. You can `clone` an `Rc` as often as you want, that doesn't affect the - //@ data it contains at all. It only creates more references to the same data. Once all the references are gone, the data is deleted. - //@ - //@ Wait a moment, you may say here. Multiple references to the same data? That's aliasing! Indeed: - //@ Once data is stored in an `Rc`, it is read-only. By dereferencing the smart `Rc`, you can only get a shared borrow of the data. - use std::rc::Rc; - - //@ Because of this read-only restriction, we cannot use `FnMut` here: We'd be unable to call the function with a mutable borrow - //@ of it's environment! So we have to go with `Fn`. We wrap that in an `Rc`, and then Rust happily derives `Clone` for us. - #[derive(Clone)] - pub struct Callbacks { - callbacks: Vec>, +impl Callbacks { + // Now we can provide some functions. The constructor should be straight-forward. + pub fn new() -> Self { + Callbacks { callbacks: Vec::new() } /*@*/ } - impl Callbacks { - pub fn new() -> Self { - Callbacks { callbacks: Vec::new() } /*@*/ - } + // Registration simply stores the callback. + pub fn register(&mut self, callback: Box) { + self.callbacks.push(callback); /*@*/ + } - // For the `register` function, we don't actually have to use trait objects in the argument. - //@ We can make this function generic, such that it will be instantiated with some concrete closure type `F` - //@ and do the creation of the `Rc` and the conversion to `Fn(i32)` itself. - - //@ For this to work, we need to demand that the type `F` does not contain any short-lived borrows. After all, we will store it - //@ in our list of callbacks indefinitely. If the closure contained a pointer to our caller's stackframe, that pointer - //@ could be invalid by the time the closure is called. We can mitigate this by bounding `F` by a *lifetime*: `T: 'a` says - //@ that all data of type `T` will *outlive* (i.e., will be valid for at least as long as) lifetime `'a`. - //@ Here, we use the special lifetime `'static`, which is the lifetime of the entire program. - //@ The same bound has been implicitly added in the version of `register` above, and in the definition of - //@ `Callbacks`. This is the reason we could not have the borrowed `count` in the closure in `demo` previously. - pub fn register(&mut self, callback: F) { - self.callbacks.push(Rc::new(callback)); /*@*/ - } + // We can also write a generic version of `register`, such that it will be instantiated with some concrete closure type `F` + // and do the creation of the `Box` and the conversion from `F` to `FnMut(i32)` itself. + + //@ For this to work, we need to demand that the type `F` does not contain any short-lived borrows. After all, we will store it + //@ in our list of callbacks indefinitely. If the closure contained a pointer to our caller's stackframe, that pointer + //@ could be invalid by the time the closure is called. We can mitigate this by bounding `F` by a *lifetime*: `F: 'a` says + //@ that all data of type `F` will *outlive* (i.e., will be valid for at least as long as) lifetime `'a`. + //@ Here, we use the special lifetime `'static`, which is the lifetime of the entire program. + //@ The same bound has been implicitly added in the version of `register` above, and in the definition of + //@ `Callbacks`. + pub fn register_generic(&mut self, callback: F) { + self.callbacks.push(Box::new(callback)); /*@*/ + } - pub fn call(&mut self, val: i32) { - // We only need a shared iterator here. `Rc` also implicitly dereferences, so we can simply call the callback. - for callback in self.callbacks.iter() { - callback(val); /*@*/ - } + // And here we call all the stored callbacks. + pub fn call(&mut self, val: i32) { + // Since they are of type `FnMut`, we need to mutably iterate. + for callback in self.callbacks.iter_mut() { + //@ Here, `callback` has type `&mut Box`. We can make use of the fact that `Box` is a *smart pointer*: In + //@ particular, we can use it as if it were a normal pointer, and use `*` to get to its contents. Then we mutably borrow + //@ these contents, because we call a `FnMut`. + (&mut *callback)(val); /*@*/ + //@ Just like it is the case with normal borrows, this typically happens implicitly, so we can also directly call the function. + //@ Try removing the `&mut *`. + //@ + //@ The difference to a normal pointer is that `Box` implies ownership: Once you drop the box (i.e., when the entire `Callbacks` instance is + //@ dropped), the content it points to on the heap will be deleted. } } +} - // The demo works just as above. Our counting callback doesn't work anymore though, because we are using `Fn` now. - fn demo(c: &mut Callbacks) { - c.register(|val| println!("Callback 1: {}", val)); - c.call(0); c.call(1); +// Now we are ready for the demo. Remember to edit `main.rs` to run it. +pub fn main() { + let mut c = Callbacks::new(); + c.register(Box::new(|val| println!("Callback 1: {}", val))); + c.call(0); + + { + //@ We can even register callbacks that modify their environment. Per default, Rust will attempt to borrow `count`. However, + //@ that doesn't work out this time. Remember the `'static` bound above? Borrowing `count` in the environment would + //@ violate that bound, as the borrow is only valid for this block. If the callbacks are triggered later, we'd be in trouble. + //@ We have to explicitly tell Rust to `move` ownership of the variable into the closure. Its environment will then contain a + //@ `usize` rather than a `&mut uszie`, and the closure has no effect on this local variable anymore. + let mut count: usize = 0; + c.register_generic(move |val| { + count = count+1; + println!("Callback 2: {} ({}. time)", val, count); + } ); } + c.call(1); c.call(2); } -// **Exercise 11.1**: We made the arbitrary choice of using `i32` for the arguments. Generalize the data-structures above -// to work with an arbitrary type `T` that's passed to the callbacks. Since you need to call multiple callbacks with the -// same `t: T`, you will either have to restrict `T` to `Copy` types, or pass a borrow. - //@ ## Run-time behavior //@ When you run the program above, how does Rust know what to do with the callbacks? Since an unsized type lacks some information, -//@ a *pointer* to such a type (be it a `Box`, an `Rc` or a borrow) will need to complete this information. We say that pointers to +//@ a *pointer* to such a type (be it a `Box` or a borrow) will need to complete this information. We say that pointers to //@ trait objects are *fat*. They store not only the address of the object, but (in the case of trait objects) also a *vtable*: A //@ table of function pointers, determining the code that's run when a trait method is called. There are some restrictions for traits to be usable //@ as trait objects. This is called *object safety* and described in [the documentation](http://doc.rust-lang.org/stable/book/trait-objects.html) and [the reference](http://doc.rust-lang.org/reference.html#trait-objects). +//@ In case of the `FnMut` trait, there's only a single action to be performed: Calling the closure. You can thus think of a pointer to `FnMut` as +//@ a pointer to the code, and a pointer to the environment. This is how Rust recovers the typical encoding of closures as a special case of a more +//@ general concept. //@ -//@ Whenever you write a generic function, you have a choice: You can make it polymorphic, like our `vec_min`. Or you -//@ can use trait objects, like the first `register` above. The latter will result in only a single compiled version (rather +//@ Whenever you write a generic function, you have a choice: You can make it generic, like `register_generic`. Or you +//@ can use trait objects, like `register`. The latter will result in only a single compiled version (rather //@ than one version per type it is instantiated with). This makes for smaller code, but you pay the overhead of the virtual function calls. -//@ Isn't it beautiful how traits can handle both of these cases (and much more, as we saw, like closures and operator overloading) nicely? +//@ (Of course, in the case of `register` above, there's no function called on the trait object.) +//@ Isn't it beautiful how traits can nicely handle this tradeoff (and much more, as we saw, like closures and operator overloading)? + +// **Exercise 11.1**: We made the arbitrary choice of using `i32` for the arguments. Generalize the data-structures above +// to work with an arbitrary type `T` that's passed to the callbacks. Since you need to call multiple callbacks with the +// same `t: T`, you will either have to restrict `T` to `Copy` types, or pass a borrow. //@ [index](main.html) | [previous](part10.html) | [next](part12.html) diff --git a/src/part12.rs b/src/part12.rs index dc0da61..c749865 100644 --- a/src/part12.rs +++ b/src/part12.rs @@ -1,191 +1,163 @@ -// Rust-101, Part 12: Concurrency, Arc, Send -// ========================================= - -use std::io::prelude::*; -use std::{io, fs, thread}; -use std::sync::mpsc::{sync_channel, SyncSender, Receiver}; -use std::sync::Arc; - -//@ Our next stop are the concurrency features of Rust. We are going to write our own small version of "grep", -//@ called *rgrep*, and it is going to make use of concurrency: One thread reads the input files, one thread does -//@ the actual matching, and one thread writes the output. I already mentioned in the beginning of the course that -//@ Rust's type system (more precisely, the discipline of ownership and borrowing) will help us to avoid a common -//@ pitfall of concurrent programming: data races. - -// Before we come to the actual code, we define a data-structure `Options` to store all the information we need -// to complete the job: Which files to work on, which pattern to look for, and how to output.
-//@ Besides just printing all the matching lines, we will also offer to count them, or alternatively to sort them. -#[derive(Clone,Copy)] -pub enum OutputMode { - Print, - SortAndPrint, - Count, -} -use self::OutputMode::*; +// Rust-101, Part 12: Rc, Interior Mutability, Cell, RefCell +// ========================================================= + +use std::rc::Rc; +use std::cell::{Cell, RefCell}; -pub struct Options { - pub files: Vec, - pub pattern: String, - pub output_mode: OutputMode, +//@ Our generic callback mechanism is already working quite nicely. However, there's one point we may want to fix: +//@ `Callbacks` does not implement `Clone`. The problem is that closures (or rather, their environment) can never be cloned. +//@ (There's not even an automatic derivation happening for the cases where it would be possible.) +//@ This restriction propagates up to `Callbacks` itself. What could we do about this? + +//@ The solution is to find some way of cloning `Callbacks` without cloning the environments. This can be achieved with +//@ `Rc`, a *reference-counted* pointer. This is is another example of a smart pointer. You can `clone` an `Rc` as often +//@ as you want, that doesn't affect the data it contains. It only creates more references to the same data. Once all the +//@ references are gone, the data is deleted. +//@ +//@ Wait a moment, you may say here. Multiple references to the same data? That's aliasing! Indeed: +//@ Once data is stored in an `Rc`, it is read-only and you can only ever get a shared borrow of the data again. + +//@ Because of this read-only restriction, we cannot use `FnMut` here: We'd be unable to call the function with a mutable borrow +//@ of it's environment! So we have to go with `Fn`. We wrap that in an `Rc`, and then Rust happily derives `Clone` for us. +#[derive(Clone)] +struct Callbacks { + callbacks: Vec>, } -//@ Now we can write three functions to do the actual job of reading, matching, and printing, respectively. -//@ To get the data from one thread to the next, we will use *message passing*: We will establish communication -//@ channels between the threads, with one thread *sending* data, and the other one *receiving* it. `SyncSender` -//@ is the type of the sending end of a synchronous channel transmitting data of type `T`. *Synchronous* here -//@ means that the `send` operation could block, waiting for the other side to make progress. We don't want to -//@ end up with the entire file being stored in the buffer of the channels, and the output not being fast enough -//@ to keep up with the speed of input. -//@ -//@ We also need all the threads to have access to the options of the job they are supposed to do. Since it would -//@ be rather unnecessary to actually copy these options around, we will use reference-counting to share them between -//@ all threads. `Arc` is the thread-safe version of `Rc`, using atomic operations to keep the reference count up-to-date. - -// The first function reads the files, and sends every line over the `out_channel`. -fn read_files(options: Arc, out_channel: SyncSender) { - for file in options.files.iter() { - // First, we open the file, ignoring any errors. - let file = fs::File::open(file).unwrap(); - // Then we obtain a `BufReader` for it, which provides the `lines` function. - let file = io::BufReader::new(file); - for line in file.lines() { - let line = line.unwrap(); - // Now we send the line over the channel, ignoring the possibility of `send` failing. - out_channel.send(line).unwrap(); - } +impl Callbacks { + pub fn new() -> Self { + Callbacks { callbacks: Vec::new() } /*@*/ } - // When we drop the `out_channel`, it will be closed, which the other end can notice. -} -// The second function filters the lines it receives through `in_channel` with the pattern, and sends -// matches via `out_channel`. -fn filter_lines(options: Arc, - in_channel: Receiver, - out_channel: SyncSender) { - // We can simply iterate over the channel, which will stop when the channel is closed. - for line in in_channel.iter() { - // `contains` works on lots of types of patterns, but in particular, we can use it to test whether - // one string is contained in another. This is another example of Rust using traits as substitute for overloading. - if line.contains(&options.pattern) { - out_channel.send(line).unwrap(); /*@*/ - } + // Registration works just like last time, except that we are creating an `Rc` now. + pub fn register(&mut self, callback: F) { + self.callbacks.push(Rc::new(callback)); /*@*/ } -} -// The third function performs the output operations, receiving the relevant lines on its `in_channel`. -fn output_lines(options: Arc, in_channel: Receiver) { - match options.output_mode { - Print => { - // Here, we just print every line we see. - for line in in_channel.iter() { - println!("{}", line); /*@*/ - } - }, - Count => { - // We are supposed to count the number of matching lines. There's a convenient iterator adapter that - // we can use for this job. - let count = in_channel.iter().count(); /*@*/ - println!("{} hits for {}.", count, options.pattern); /*@*/ - }, - SortAndPrint => { - // We are asked to sort the matching lines before printing. So let's collect them all in a local vector... - let mut data: Vec = in_channel.iter().collect(); - // ...and implement the actual sorting later. - unimplemented!() + pub fn call(&self, val: i32) { + // We only need a shared iterator here. Since `Rc` is a smart pointer, we can directly call the callback. + for callback in self.callbacks.iter() { + callback(val); /*@*/ } } } -// With the operations of the three threads defined, we can now implement a function that performs grepping according -// to some given options. -pub fn run(options: Options) { - // We move the `options` into an `Arc`, as that's what the thread workers expect. - let options = Arc::new(options); - - // This sets up the channels. We use a `sync_channel` with buffer-size of 16 to avoid needlessly filling RAM. - let (line_sender, line_receiver) = sync_channel(16); - let (filtered_sender, filtered_receiver) = sync_channel(16); - - // Spawn the read thread: `thread::spawn` takes a closure that is run in a new thread. - //@ The `move` keyword again tells Rust that we want ownership of captured variables to be moved into the - //@ closure. This means we need to do the `clone` *first*, otherwise we would lose our `options` to the - //@ new thread! - let options1 = options.clone(); - let handle1 = thread::spawn(move || read_files(options1, line_sender)); - - // Same with the filter thread. - let options2 = options.clone(); - let handle2 = thread::spawn(move || { - filter_lines(options2, line_receiver, filtered_sender) - }); - - // And the output thread. - let options3 = options.clone(); - let handle3 = thread::spawn(move || output_lines(options3, filtered_receiver)); - - // Finally, wait until all three threads did their job. - //@ Joining a thread waits for its termination. This can fail if that thread panicked: In this case, we could get - //@ access to the data that it provided to `panic!`. Here, we just assert that they did not panic - so we will panic ourselves - //@ if that happened. - handle1.join().unwrap(); - handle2.join().unwrap(); - handle3.join().unwrap(); +// Time for a demo! +fn demo(c: &mut Callbacks) { + c.register(|val| println!("Callback 1: {}", val)); + c.call(0); c.clone().call(1); } -// Now we have all the pieces together for testing our rgrep with some hard-coded options. -//@ We need to call `to_string` on string literals to convert them to a fully-owned `String`. pub fn main() { - let options = Options { - files: vec!["src/part10.rs".to_string(), - "src/part11.rs".to_string(), - "src/part12.rs".to_string()], - pattern: "let".to_string(), - output_mode: Print - }; - run(options); + let mut c = Callbacks::new(); + demo(&mut c); } -// **Exercise 12.1**: Change rgrep such that it prints not only the matching lines, but also the name of the file -// and the number of the line in the file. You will have to change the type of the channels from `String` to something -// that records this extra information. - -//@ ## Ownership, Borrowing, and Concurrency -//@ The little demo above showed that concurrency in Rust has a fairly simple API. Considering Rust has closures, -//@ that should not be entirely surprising. However, as it turns out, Rust goes well beyond this and actually ensures -//@ the absence of data races.
-//@ A data race is typically defined as having two concurrent, unsynchronized -//@ accesses to the same memory location, at least one of which is a write. In other words, a data race is mutation in -//@ the presence of aliasing, which Rust reliably rules out! It turns out that the same mechanism that makes our single-threaded -//@ programs memory safe, and that prevents us from invalidating iterators, also helps secure our multi-threaded code against -//@ data races. For example, notice how `read_files` sends a `String` to `filter_lines`. At run-time, only the pointer to -//@ the character data will actually be moved around (just like when a `String` is passed to a function with full ownership). However, -//@ `read_files` has to *give up* ownership of the string to perform `send`, to it is impossible for an outstanding borrow to -//@ still be around. After it sent the string to the other side, `read_files` has no pointer into the string content -//@ anymore, and hence no way to race on the data with someone else. -//@ -//@ There is a little more to this. Remember the `'static` bound we had to add to `register` in the previous part, to make -//@ sure that the callbacks do not reference any pointers that might become invalid? This is just as crucial for spawning -//@ a thread: In general, that thread could last for much longer than the current stack frame. Thus, it must not use -//@ any pointers to data in that stack frame. This is achieved by requiring the `FnOnce` closure passed to `thread::spawn` -//@ to be valid for lifetime `'static`, as you can see in [its documentation](http://doc.rust-lang.org/stable/std/thread/fn.spawn.html). -//@ This avoids another kind of data race, where the thread's access races with the callee deallocating its stack frame. -//@ It is only thanks to the concept of lifetimes that this can be expressed as part of the type of `spawn`. - -//@ ## Send -//@ However, the story goes even further. I said above that `Arc` is a thread-safe version of `Rc`, which uses atomic operations -//@ to manipulate the reference count. It is thus crucial that we don't use `Rc` across multiple threads, or the reference count may -//@ become invalid. And indeed, if you replace `Arc` by `Rc` (and add the appropriate imports), Rust will tell you that something -//@ is wrong. That's great, of course, but how did it do that? -//@ -//@ The answer is already hinted at in the error: It will say something about `Send`. You may have noticed that the closure in -//@ `thread::spawn` does not just have a `'static` bound, but also has to satisfy `Send`. `Send` is a trait, and just like `Copy`, -//@ it's just a marker - there are no functions provided by `Send`. What the trait says is that types which are `Send`, can be -//@ safely sent to another thread without causing trouble. Of course, all the primitive data-types are `Send`. So is `Arc`, -//@ which is why Rust accepted our code. But `Rc` is not `Send`, and for a good reason! +// ## Interior Mutability +//@ Of course, the counting example from last time does not work anymore: It needs to mutate the environment, which a `Fn` +//@ cannot do. The strict borrowing Rules of Rust are getting into our way. However, when it comes to mutating a mere number +//@ (`usize`), there's not really any chance of problems coming up. Everybody can read and write that variable just as they want. +//@ So it would be rather sad if we were not able to write this program. Lucky enough, Rust's standard library provides a +//@ solution in the form of `Cell`. This type represents a memory cell of some type `T`, providing the two basic operations +//@ `get` and `set`. `get` returns a *copy* of the content of the cell, so all this works only if `T` is `Copy`. +//@ `set`, which overrides the content, only needs a *shared borrow* of the cell. The phenomenon of a type that permits mutation through +//@ shared borrows (i.e., mutation despite the possibility of aliasing) is called *interior mutability*. You can think +//@ of `set` changing only the *contents* of the cell, not its *identity*. In contrast, the kind of mutation we saw so far was +//@ about replacing one piece of data by something else of the same type. This is called *exterior mutability*.
+//@ Notice that it is impossible to *borrow* the contents of the cell, and that is actually the key to why this is safe. + +// So, let us put our counter in a `Cell`, and replicate the example from the previous part. +fn demo_cell(c: &mut Callbacks) { + { + let count = Cell::new(0); + // Again, we have to move ownership if the `count` into the environment closure. + c.register(move |val| { + // In here, all we have is a shared borrow of our environment. But that's good enough for the `get` and `set` of the cell! + //@ At run-time, the `Cell` will be almost entirely compiled away, so this becomes pretty much equivalent to the version + //@ we wrote in the previous part. + let new_count = count.get()+1; + count.set(new_count); + println!("Callback 2: {} ({}. time)", val, new_count); + } ); + } + + c.call(2); c.clone().call(3); +} + +//@ It is worth mentioning that `Rc` itself also has to make use of interior mutability: When you `clone` an `Rc`, all it has available +//@ is a shared borrow. However, it has to increment the reference count! Internally, `Rc` uses `Cell` for the count, such that it +//@ can be updated during `clone`. + +// ## `RefCell` +//@ As the next step in the evolution of `Callbacks`, we could try to solve this problem of mutability once and for all, by adding `Cell` +//@ to `Callbacks` such that clients don't have to worry about this. However, that won't end up working: Remember that `Cell` only works +//@ with types that are `Copy`, which the environment of a closure will never be. We need a variant of `Cell` that allows borrowing its +//@ contents, such that we can provide a `FnMut` with its environment. But if `Cell` would allow that, we could write down all those +//@ crashing C++ programs that we wanted to get rid of. //@ -//@ Now, `Send` as a trait is fairly special. It has a so-called *default implementation*. This means that *every type* implements -//@ `Send`, unless it opts out. Opting out is viral: If your type contains a type that opted out, then you don't have `Send`, either. -//@ So if the environment of your closure contains an `Rc`, it won't be `Send`, preventing it from causing trouble. If however every -//@ captured variable *is* `Send`, then so is the entire environment, and you are good. +//@ This is the point where our program got too complex for Rust to guarantee at compile-time that nothing bad will happen. Since we don't +//@ want to give up the safety guarantee, we are going to need some code that actually checks at run-time that the borrowing rules +//@ are not violated. Such a check is provided by `RefCell`: Unlike `Cell`, this lets us borrow the contents, and it works for +//@ non-`Copy` `T`. But, as we will see, it incurs some run-time overhead. + +// Our final version of `Callbacks` puts the closure environment into a `RefCell`. +#[derive(Clone)] +struct CallbacksMut { + callbacks: Vec>>, +} + +impl CallbacksMut { + pub fn new() -> Self { + CallbacksMut { callbacks: Vec::new() } /*@*/ + } + + pub fn register(&mut self, callback: F) { + let cell = Rc::new(RefCell::new(callback)); + self.callbacks.push(cell); /*@*/ + } + + pub fn call(&mut self, val: i32) { + for callback in self.callbacks.iter() { + // We have to *explicitly* borrow the contents of a `RefCell` by calling `borrow` or `borrow_mut`. + //@ At run-time, the cell will keep track of the number of outstanding shared and mutable borrows, + //@ and panic if the rules are violated.
+ //@ For this check to be performed, `closure` is a *guard*: Rather than a normal borrow, `borrow_mut` returns + //@ a smart pointer (`RefMut`, in this case) that waits until is goes out of scope, and then + //@ appropriately updates the number of active borrows. + //@ + //@ Since `call` is the only place that borrows the environments of the closures, we should expect that + //@ the check will always succeed. However, this function would still typecheck with an immutable borrow of `self` (since we are + //@ relying on the interior mutability of `RefCell`). Under this condition, it could happen that a callback + //@ will in turn trigger another round of callbacks, so that `call` would indirectly call itself. + //@ This is called reentrancy. It would imply that we borrow the closure a second time, and + //@ panic at run-time. I hope this also makes it clear that there's absolutely no hope of Rust + //@ performing these checks statically, at compile-time: It would have to detect reentrancy! + let mut closure = callback.borrow_mut(); + // Unfortunately, Rust's auto-dereference of pointers is not clever enough here. We thus have to explicitly + // dereference the smart pointer and obtain a mutable borrow of the content. + (&mut *closure)(val); + } + } +} + +// Now we can repeat the demo from the previous part - but this time, our `CallbacksMut` type +// can be cloned. +fn demo_mut(c: &mut CallbacksMut) { + c.register(|val| println!("Callback 1: {}", val)); + c.call(0); + + { + let mut count: usize = 0; + c.register(move |val| { + count = count+1; + println!("Callback 2: {} ({}. time)", val, count); + } ); + } + c.call(1); c.clone().call(2); +} + +// **Exercise 12.1**: Change the type of `call` to ask only for a shared borrow. Then write some piece of code using only the available, public +// interface of `CallbacksMut` such that a reentrant call to `call` is happening, and the program aborts because the `RefCell` refuses to hand +// out a second mutable borrow to its content. //@ [index](main.html) | [previous](part11.html) | [next](part13.html) diff --git a/src/part13.rs b/src/part13.rs index 29647ea..76d7154 100644 --- a/src/part13.rs +++ b/src/part13.rs @@ -1,161 +1,191 @@ -// Rust-101, Part 13: Slices, Arrays, External Dependencies -// ======================================================== - -//@ To complete rgrep, there are two pieces we still need to implement: Sorting, and taking the job options -//@ as argument to the program, rather than hard-coding them. Let's start with sorting. - -// ## Slices -//@ Again, we first have to think about the type we want to give to our sorting function. We may be inclined to -//@ pass it a `Vec`. Of course, sorting does not actually consume the argument, so we should make that a `&mut Vec`. -//@ But there's a problem with that: If we want to implement some divide-and-conquer sorting algorithm (say, -//@ Quicksort), then we will have to *split* our argument at some point, and operate recursively on the two parts. -//@ But we can't split a `Vec`! We could now extend the function signature to also take some indices, marking the -//@ part of the vector we are supposed to sort, but that's all rather clumsy. Rust offers a nicer solution. - -//@ `[T]` is the type of an (unsized) *array*, with elements of type `T`. All this means is that there's a contiguous -//@ region of memory, where a bunch of `T` are stored. How many? We can't tell! This is an unsized type. Just like for -//@ trait objects, this means we can only operate on pointers to that type, and these pointers will carry the missing -//@ information - namely, the length. Such a pointer is called a *slice*. As we will see, a slice can be split. -//@ Our function can thus take a borrowed slice, and promise to sort all elements in there. -pub fn sort(data: &mut [T]) { - if data.len() < 2 { return; } - - // We decide that the element at 0 is our pivot, and then we move our cursors through the rest of the slice, - // making sure that everything on the left is no larger than the pivot, and everything on the right is no smaller. - let mut lpos = 1; - let mut rpos = data.len(); - /* Invariant: pivot is data[0]; everything with index (0,lpos) is <= pivot; - [rpos,len) is >= pivot; lpos < rpos */ - loop { - // **Exercise 13.1**: Complete this Quicksort loop. You can use `swap` on slices to swap two elements. Write a - // test function for `sort`. - unimplemented!() - } - - // Once our cursors met, we need to put the pivot in the right place. - data.swap(0, lpos-1); - - // Finally, we split our slice to sort the two halves. The nice part about slices is that splitting them is cheap: - //@ They are just a pointer to a start address, and a length. We can thus get two pointers, one at the beginning and - //@ one in the middle, and set the lengths appropriately such that they don't overlap. This is what `split_at_mut` does. - //@ Since the two slices don't overlap, there is no aliasing and we can have them both mutably borrowed. - let (part1, part2) = data.split_at_mut(lpos); - //@ The index operation can not only be used to address certain elements, it can also be used for *slicing*: Giving a range - //@ of indices, and obtaining an appropriate part of the slice we started with. Here, we remove the last element from - //@ `part1`, which is the pivot. This makes sure both recursive calls work on strictly smaller slices. - sort(&mut part1[..lpos-1]); /*@*/ - sort(part2); /*@*/ +// Rust-101, Part 13: Concurrency, Arc, Send +// ========================================= + +use std::io::prelude::*; +use std::{io, fs, thread}; +use std::sync::mpsc::{sync_channel, SyncSender, Receiver}; +use std::sync::Arc; + +//@ Our next stop are the concurrency features of Rust. We are going to write our own small version of "grep", +//@ called *rgrep*, and it is going to make use of concurrency: One thread reads the input files, one thread does +//@ the actual matching, and one thread writes the output. I already mentioned in the beginning of the course that +//@ Rust's type system (more precisely, the discipline of ownership and borrowing) will help us to avoid a common +//@ pitfall of concurrent programming: data races. + +// Before we come to the actual code, we define a data-structure `Options` to store all the information we need +// to complete the job: Which files to work on, which pattern to look for, and how to output.
+//@ Besides just printing all the matching lines, we will also offer to count them, or alternatively to sort them. +#[derive(Clone,Copy)] +pub enum OutputMode { + Print, + SortAndPrint, + Count, } +use self::OutputMode::*; -// **Exercise 13.2**: Since `String` implements `PartialEq`, you can now change the function `output_lines` in the previous part -// to call the sort function above. If you did exercise 12.1, you will have slightly more work. Make sure you sort by the matched line -// only, not by filename or line number! - -// Now, we can sort, e.g., an vector of numbers. -fn sort_nums(data: &mut Vec) { - //@ Vectors support slicing, just like slices do. Here, `..` denotes the full range, which means we want to slice the entire vector. - //@ It is then passed to the `sort` function, which doesn't even know that it is working on data inside a vector. - sort(&mut data[..]); +pub struct Options { + pub files: Vec, + pub pattern: String, + pub output_mode: OutputMode, } -// ## Arrays -//@ An *array* in Rust is given be the type `[T; n]`, where `n` is some *fixed* number. So, `[f64; 10]` is an array of 10 floating-point -//@ numbers, all one right next to the other in memory. Arrays are sized, and hence can be used like any other type. But we can also -//@ borrow them as slices, e.g., to sort them. -fn sort_array() { - let mut array_of_data: [f64; 5] = [1.0, 3.4, 12.7, -9.12, 0.1]; - sort(&mut array_of_data); +//@ Now we can write three functions to do the actual job of reading, matching, and printing, respectively. +//@ To get the data from one thread to the next, we will use *message passing*: We will establish communication +//@ channels between the threads, with one thread *sending* data, and the other one *receiving* it. `SyncSender` +//@ is the type of the sending end of a synchronous channel transmitting data of type `T`. *Synchronous* here +//@ means that the `send` operation could block, waiting for the other side to make progress. We don't want to +//@ end up with the entire file being stored in the buffer of the channels, and the output not being fast enough +//@ to keep up with the speed of input. +//@ +//@ We also need all the threads to have access to the options of the job they are supposed to do. Since it would +//@ be rather unnecessary to actually copy these options around, we will use reference-counting to share them between +//@ all threads. `Arc` is the thread-safe version of `Rc`, using atomic operations to keep the reference count up-to-date. + +// The first function reads the files, and sends every line over the `out_channel`. +fn read_files(options: Arc, out_channel: SyncSender) { + for file in options.files.iter() { + // First, we open the file, ignoring any errors. + let file = fs::File::open(file).unwrap(); + // Then we obtain a `BufReader` for it, which provides the `lines` function. + let file = io::BufReader::new(file); + for line in file.lines() { + let line = line.unwrap(); + // Now we send the line over the channel, ignoring the possibility of `send` failing. + out_channel.send(line).unwrap(); + } + } + // When we drop the `out_channel`, it will be closed, which the other end can notice. } -// ## External Dependencies -//@ This leaves us with just one more piece to complete rgrep: Taking arguments from the command-line. We could now directly work on -//@ [`std::env::args`](http://doc.rust-lang.org/stable/std/env/fn.args.html) to gain access to those arguments, and this would become -//@ a pretty boring lesson in string manipulation. Instead, I want to use this opportunity to show how easy it is to benefit from -//@ other people's work in your program. -//@ -//@ For sure, we are not the first to equip a Rust program with support for command-line arguments. Someone must have written a library -//@ for the job, right? Indeed, someone has. Rust has a central repository of published libraries, called [crates.io](https://crates.io/). -//@ It's a bit like [PyPI](https://pypi.python.org/pypi) or the [Ruby Gems](https://rubygems.org/): Everybody can upload their code, -//@ and there's tooling for importing that code into your project. This tooling is provided by `cargo`, the tool we are already using to -//@ build this tutorial. (`cargo` also has support for *publishing* your crate on crates.io, I refer you to [the documentation](http://doc.crates.io/crates-io.html) for more details.) -//@ In this case, we are going to use the [`docopt` crate](https://crates.io/crates/docopt), which creates a parser for command-line -//@ arguments based on the usage string. External dependencies are declared in the `Cargo.toml` file. - -//@ I already prepared that file, but the declaration of the dependency is still commented out. So please open `Cargo.toml` of your workspace -//@ now, and enabled the two commented-out lines. Then do `cargo build`. Cargo will now download the crate from crates.io, compile it, -//@ and link it to your program. In the future, you can do `cargo update` to make it download new versions of crates you depend on. -//@ Note that crates.io is only the default location for dependencies, you can also give it the URL of a git repository or some local -//@ path. All of this is explained in the [Cargo Guide](http://doc.crates.io/guide.html). - -// I disabled the following module (using a rather bad hack), because it only compiles if `docopt` is linked. -// Remove the attribute of the `rgrep` module to enable compilation. -#[cfg(feature = "disabled")] -pub mod rgrep { - // Now that `docopt` is linked, we can first root it in the namespace and then import it with `use`. We also import some other pieces that we will need. - extern crate docopt; - use self::docopt::Docopt; - use part12::{run, Options, OutputMode}; - use std::process; - - // The `USAGE` string documents how the program is to be called. It's written in a format that `docopt` can parse. - static USAGE: &'static str = " -Usage: rgrep [-c] [-s] ... - -Options: - -c, --count Count number of matching lines (rather than printing them). - -s, --sort Sort the lines before printing. -"; - - // This function extracts the rgrep options from the command-line arguments. - fn get_options() -> Options { - // Parse `argv` and exit the program with an error message if it fails. This is taken from the [`docopt` documentation](http://burntsushi.net/rustdoc/docopt/). - //@ The function `and_then` takes a closure from `T` to `Result`, and uses it to transform a `Result` to a - //@ `Result`. This way, we can chain computations that only happen if the previous one succeeded (and the error - //@ type has to stay the same). In case you know about monads, this style of programming will be familiar to you. - //@ There's a similar function for `Option`. `unwrap_or_else` is a bit like `unwrap`, but rather than panicking in - //@ case of an `Err`, it calls the closure. - let args = Docopt::new(USAGE).and_then(|d| d.parse()).unwrap_or_else(|e| e.exit()); - // Now we can get all the values out. - let count = args.get_bool("-c"); - let sort = args.get_bool("-s"); - let pattern = args.get_str(""); - let files = args.get_vec(""); - if count && sort { - println!("Setting both '-c' and '-s' at the same time does not make any sense."); - process::exit(1); +// The second function filters the lines it receives through `in_channel` with the pattern, and sends +// matches via `out_channel`. +fn filter_lines(options: Arc, + in_channel: Receiver, + out_channel: SyncSender) { + // We can simply iterate over the channel, which will stop when the channel is closed. + for line in in_channel.iter() { + // `contains` works on lots of types of patterns, but in particular, we can use it to test whether + // one string is contained in another. This is another example of Rust using traits as substitute for overloading. + if line.contains(&options.pattern) { + out_channel.send(line).unwrap(); /*@*/ } + } +} - // We need to make the strings owned to construct the `Options` instance. - //@ If you check all the types carefully, you will notice that `pattern` above is of type `&str`. `str` is the type of a UTF-8 - //@ encoded string, that is, a bunch of bytes in memory (`[u8]`) that are valid according of UTF-8. `str` is unsized. `&str` - //@ stores the address of the character data, and their length. String literals like "this one" are - //@ of type `&'static str`: They point right to the constant section of the binary, so - //@ However, the borrow is valid for as long as the program runs, hence it has lifetime `'static`. Calling - //@ `to_string` will copy the string data into an owned buffer on the heap, and thus convert it to `String`. - let mode = if count { - OutputMode::Count - } else if sort { - OutputMode::SortAndPrint - } else { - OutputMode::Print - }; - Options { - files: files.iter().map(|file| file.to_string()).collect(), - pattern: pattern.to_string(), - output_mode: mode, +// The third function performs the output operations, receiving the relevant lines on its `in_channel`. +fn output_lines(options: Arc, in_channel: Receiver) { + match options.output_mode { + Print => { + // Here, we just print every line we see. + for line in in_channel.iter() { + println!("{}", line); /*@*/ + } + }, + Count => { + // We are supposed to count the number of matching lines. There's a convenient iterator adapter that + // we can use for this job. + let count = in_channel.iter().count(); /*@*/ + println!("{} hits for {}.", count, options.pattern); /*@*/ + }, + SortAndPrint => { + // We are asked to sort the matching lines before printing. So let's collect them all in a local vector... + let mut data: Vec = in_channel.iter().collect(); + // ...and implement the actual sorting later. + unimplemented!() } } +} - // Finally, we can call the `run` function from the previous part on the options extracted using `get_options`. Edit `main.rs` to call this function. - // You can now use `cargo run -- ` to call your program, and see the argument parser and the threads we wrote previously in action! - pub fn main() { - run(get_options()); /*@*/ - } +// With the operations of the three threads defined, we can now implement a function that performs grepping according +// to some given options. +pub fn run(options: Options) { + // We move the `options` into an `Arc`, as that's what the thread workers expect. + let options = Arc::new(options); + + // This sets up the channels. We use a `sync_channel` with buffer-size of 16 to avoid needlessly filling RAM. + let (line_sender, line_receiver) = sync_channel(16); + let (filtered_sender, filtered_receiver) = sync_channel(16); + + // Spawn the read thread: `thread::spawn` takes a closure that is run in a new thread. + //@ The `move` keyword again tells Rust that we want ownership of captured variables to be moved into the + //@ closure. This means we need to do the `clone` *first*, otherwise we would lose our `options` to the + //@ new thread! + let options1 = options.clone(); + let handle1 = thread::spawn(move || read_files(options1, line_sender)); + + // Same with the filter thread. + let options2 = options.clone(); + let handle2 = thread::spawn(move || { + filter_lines(options2, line_receiver, filtered_sender) + }); + + // And the output thread. + let options3 = options.clone(); + let handle3 = thread::spawn(move || output_lines(options3, filtered_receiver)); + + // Finally, wait until all three threads did their job. + //@ Joining a thread waits for its termination. This can fail if that thread panicked: In this case, we could get + //@ access to the data that it provided to `panic!`. Here, we just assert that they did not panic - so we will panic ourselves + //@ if that happened. + handle1.join().unwrap(); + handle2.join().unwrap(); + handle3.join().unwrap(); +} + +// Now we have all the pieces together for testing our rgrep with some hard-coded options. +//@ We need to call `to_string` on string literals to convert them to a fully-owned `String`. +pub fn main() { + let options = Options { + files: vec!["src/part10.rs".to_string(), + "src/part11.rs".to_string(), + "src/part12.rs".to_string()], + pattern: "let".to_string(), + output_mode: Print + }; + run(options); } -// **Exercise 13.3**: Wouldn't it be nice if rgrep supported regular expressions? There's already a crate that does all the parsing and matching on regular -// expression, it's called [regex](https://crates.io/crates/regex). Add this crate to the dependencies of your workspace, add an option ("-r") to switch -// the pattern to regular-expression mode, and change `filter_lines` to honor this option. The documentation of regex is available from its crates.io site. -// (You won't be able to use the `regex!` macro if you are on the stable or beta channel of Rust. But it wouldn't help for our use-case anyway.) +// **Exercise 12.1**: Change rgrep such that it prints not only the matching lines, but also the name of the file +// and the number of the line in the file. You will have to change the type of the channels from `String` to something +// that records this extra information. + +//@ ## Ownership, Borrowing, and Concurrency +//@ The little demo above showed that concurrency in Rust has a fairly simple API. Considering Rust has closures, +//@ that should not be entirely surprising. However, as it turns out, Rust goes well beyond this and actually ensures +//@ the absence of data races.
+//@ A data race is typically defined as having two concurrent, unsynchronized +//@ accesses to the same memory location, at least one of which is a write. In other words, a data race is mutation in +//@ the presence of aliasing, which Rust reliably rules out! It turns out that the same mechanism that makes our single-threaded +//@ programs memory safe, and that prevents us from invalidating iterators, also helps secure our multi-threaded code against +//@ data races. For example, notice how `read_files` sends a `String` to `filter_lines`. At run-time, only the pointer to +//@ the character data will actually be moved around (just like when a `String` is passed to a function with full ownership). However, +//@ `read_files` has to *give up* ownership of the string to perform `send`, to it is impossible for an outstanding borrow to +//@ still be around. After it sent the string to the other side, `read_files` has no pointer into the string content +//@ anymore, and hence no way to race on the data with someone else. +//@ +//@ There is a little more to this. Remember the `'static` bound we had to add to `register` in the previous part, to make +//@ sure that the callbacks do not reference any pointers that might become invalid? This is just as crucial for spawning +//@ a thread: In general, that thread could last for much longer than the current stack frame. Thus, it must not use +//@ any pointers to data in that stack frame. This is achieved by requiring the `FnOnce` closure passed to `thread::spawn` +//@ to be valid for lifetime `'static`, as you can see in [its documentation](http://doc.rust-lang.org/stable/std/thread/fn.spawn.html). +//@ This avoids another kind of data race, where the thread's access races with the callee deallocating its stack frame. +//@ It is only thanks to the concept of lifetimes that this can be expressed as part of the type of `spawn`. + +//@ ## Send +//@ However, the story goes even further. I said above that `Arc` is a thread-safe version of `Rc`, which uses atomic operations +//@ to manipulate the reference count. It is thus crucial that we don't use `Rc` across multiple threads, or the reference count may +//@ become invalid. And indeed, if you replace `Arc` by `Rc` (and add the appropriate imports), Rust will tell you that something +//@ is wrong. That's great, of course, but how did it do that? +//@ +//@ The answer is already hinted at in the error: It will say something about `Send`. You may have noticed that the closure in +//@ `thread::spawn` does not just have a `'static` bound, but also has to satisfy `Send`. `Send` is a trait, and just like `Copy`, +//@ it's just a marker - there are no functions provided by `Send`. What the trait says is that types which are `Send`, can be +//@ safely sent to another thread without causing trouble. Of course, all the primitive data-types are `Send`. So is `Arc`, +//@ which is why Rust accepted our code. But `Rc` is not `Send`, and for a good reason! +//@ +//@ Now, `Send` as a trait is fairly special. It has a so-called *default implementation*. This means that *every type* implements +//@ `Send`, unless it opts out. Opting out is viral: If your type contains a type that opted out, then you don't have `Send`, either. +//@ So if the environment of your closure contains an `Rc`, it won't be `Send`, preventing it from causing trouble. If however every +//@ captured variable *is* `Send`, then so is the entire environment, and you are good. //@ [index](main.html) | [previous](part12.html) | [next](part14.html) diff --git a/src/part14.rs b/src/part14.rs index 596094b..6550fe5 100644 --- a/src/part14.rs +++ b/src/part14.rs @@ -1,142 +1,161 @@ -// Rust-101, Part 14: Mutex, Interior Mutability, Sync -// =================================================== +// Rust-101, Part 14: Slices, Arrays, External Dependencies +// ======================================================== -use std::sync::{Arc, Mutex}; -use std::thread; +//@ To complete rgrep, there are two pieces we still need to implement: Sorting, and taking the job options +//@ as argument to the program, rather than hard-coding them. Let's start with sorting. -//@ We already saw that we can use `Arc` to share memory between threads. However, `Arc` can only provide *read-only* -//@ access to memory: Since there is aliasing, Rust cannot, in general, permit mutation. If however, -//@ some care would be taken at run-time, then mutation would still be all right: We have to ensure that whenever -//@ someone changes the data, nobody else is looking at it. In other words, we need a *critical section* or (as it -//@ is called in Rust) a [`Mutex`](http://doc.rust-lang.org/stable/std/sync/struct.Mutex.html). Some other languages also call this a *lock*. -//@ -//@ As an example, let us write a concurrent counter. As usual in Rust, we first have to think about our data layout. -//@ In case of the mutex, this means we have to declare the type of the data that we want to be protected. In Rust, -//@ a `Mutex` protects data, not code - and it is impossible to access the data in any other way. This is generally considered -//@ good style, but other languages typically lack the ability to actually enforce this. -//@ Of course, we want multiple threads to have access to this `Mutex`, so we wrap it in an `Arc`. -//@ -//@ Rather than giving every field a name, a struct can also be defined by just giving a sequence of types (similar -//@ to how a variant of an `enum` is defined). This is called a *tuple struct*. It is often used when constructing -//@ a *newtype*, as we do here: `ConcurrentCounter` is essentially just a new name for `Arc>`. However, -//@ is is a locally declared types, so we can give it an inherent implementation and implement traits for it. Since the -//@ field is private, nobody outside this module can even know the type we are wrapping. - -// The derived `Clone` implementation will clone the `Arc`, so all clones will actually talk about the same counter. -#[derive(Clone)] -struct ConcurrentCounter(Arc>); - -impl ConcurrentCounter { - // The constructor just wraps the constructors of `Arc` and `Mutex`. - pub fn new(val: usize) -> Self { - ConcurrentCounter(Arc::new(Mutex::new(val))) /*@*/ - } +// ## Slices +//@ Again, we first have to think about the type we want to give to our sorting function. We may be inclined to +//@ pass it a `Vec`. Of course, sorting does not actually consume the argument, so we should make that a `&mut Vec`. +//@ But there's a problem with that: If we want to implement some divide-and-conquer sorting algorithm (say, +//@ Quicksort), then we will have to *split* our argument at some point, and operate recursively on the two parts. +//@ But we can't split a `Vec`! We could now extend the function signature to also take some indices, marking the +//@ part of the vector we are supposed to sort, but that's all rather clumsy. Rust offers a nicer solution. - //@ The core operation is, of course, `increment`. The type may be surprising at first: A shared borrow? - //@ How can this be, since `increment` definitely modifies the counter? We already discussed above that `Mutex` is - //@ a way to get around this restriction in Rust. This phenomenon of data that can be mutated through a shared - //@ borrow is called *interior mutability*: We are changing the inner parts of the object, but seen from the outside, - //@ this does not count as "mutation". This stands in contrast to *exterior mutability*, which is the kind of - //@ mutability we saw so far, where one piece of data is replaced by something else of the same type. If you are familiar - //@ with languages like ML, you can compare this to how something of type `ref` permits mutation, even though it is - //@ itself a functional value (more precisely, a location) like all the others. - //@ - //@ Interior mutability breaks the rules of Rust that I outlined earlier: There is aliasing (a shared borrow) and mutation. - //@ The reason that this still works is careful programming of the primitives for interior mutability - in this case, that's - //@ `Mutex`. It has to ensure with dynamic checks, at run-time, that things don't fall apart. In particular, it has to ensure - //@ that the data covered by the mutex can only ever be accessed from inside a critical section. This is where Rust's type - //@ system comes into play: With its discipline of ownership and borrowing, it can enforce such rules. Let's see how this goes. - pub fn increment(&self, by: usize) { - // `lock` on a mutex returns a *guard*, giving access to the data contained in the mutex. - //@ (We will discuss the `unwrap` soon.) `.0` is how we access the first component of a tuple or a struct. - let mut counter = self.0.lock().unwrap(); - //@ The guard is another example of a smart pointer, and it can be used as if it were a pointer to the data protected - //@ by the lock. - *counter = *counter + by; - //@ At the end of the function, `counter` is dropped and the mutex is available again. - //@ This can only happen when full ownership of the guard is given up. In particular, it is impossible for us - //@ to borrow some of its content, release the lock of the mutex, and subsequently access the protected data without holding - //@ the lock. Enforcing the locking discipline is expressible in the Rust type system, so we don't have to worry - //@ about data races *even though* we are mutating shared memory! - //@ - //@ One of the subtle aspects of locking is *poisoning*. If a thread panics while it holds a lock, it could leave the - //@ data-structure in a bad state. The lock is hence considered *poisoned*. Future attempts to `lock` it will fail. - //@ Above, we simply assert via `unwrap` that this will never happen. Alternatively, we could have a look at the poisoned - //@ state and attempt to recover from it. - } +//@ `[T]` is the type of an (unsized) *array*, with elements of type `T`. All this means is that there's a contiguous +//@ region of memory, where a bunch of `T` are stored. How many? We can't tell! This is an unsized type. Just like for +//@ trait objects, this means we can only operate on pointers to that type, and these pointers will carry the missing +//@ information - namely, the length. Such a pointer is called a *slice*. As we will see, a slice can be split. +//@ Our function can thus take a borrowed slice, and promise to sort all elements in there. +pub fn sort(data: &mut [T]) { + if data.len() < 2 { return; } - // The function `get` returns the current value of the counter. - pub fn get(&self) -> usize { - let counter = self.0.lock().unwrap(); /*@*/ - *counter /*@*/ + // We decide that the element at 0 is our pivot, and then we move our cursors through the rest of the slice, + // making sure that everything on the left is no larger than the pivot, and everything on the right is no smaller. + let mut lpos = 1; + let mut rpos = data.len(); + /* Invariant: pivot is data[0]; everything with index (0,lpos) is <= pivot; + [rpos,len) is >= pivot; lpos < rpos */ + loop { + // **Exercise 13.1**: Complete this Quicksort loop. You can use `swap` on slices to swap two elements. Write a + // test function for `sort`. + unimplemented!() } + + // Once our cursors met, we need to put the pivot in the right place. + data.swap(0, lpos-1); + + // Finally, we split our slice to sort the two halves. The nice part about slices is that splitting them is cheap: + //@ They are just a pointer to a start address, and a length. We can thus get two pointers, one at the beginning and + //@ one in the middle, and set the lengths appropriately such that they don't overlap. This is what `split_at_mut` does. + //@ Since the two slices don't overlap, there is no aliasing and we can have them both mutably borrowed. + let (part1, part2) = data.split_at_mut(lpos); + //@ The index operation can not only be used to address certain elements, it can also be used for *slicing*: Giving a range + //@ of indices, and obtaining an appropriate part of the slice we started with. Here, we remove the last element from + //@ `part1`, which is the pivot. This makes sure both recursive calls work on strictly smaller slices. + sort(&mut part1[..lpos-1]); /*@*/ + sort(part2); /*@*/ } -// Now our counter is ready for action. -pub fn main() { - let counter = ConcurrentCounter::new(0); +// **Exercise 13.2**: Since `String` implements `PartialEq`, you can now change the function `output_lines` in the previous part +// to call the sort function above. If you did exercise 12.1, you will have slightly more work. Make sure you sort by the matched line +// only, not by filename or line number! - // We clone the counter for the first thread, which increments it by 2 every 15ms. - let counter1 = counter.clone(); - let handle1 = thread::spawn(move || { - for _ in 0..10 { - thread::sleep_ms(15); - counter1.increment(2); - } - }); - - // The second thread increments the counter by 3 every 20ms. - let counter2 = counter.clone(); - let handle2 = thread::spawn(move || { - for _ in 0..10 { - thread::sleep_ms(20); - counter2.increment(3); +// Now, we can sort, e.g., an vector of numbers. +fn sort_nums(data: &mut Vec) { + //@ Vectors support slicing, just like slices do. Here, `..` denotes the full range, which means we want to slice the entire vector. + //@ It is then passed to the `sort` function, which doesn't even know that it is working on data inside a vector. + sort(&mut data[..]); +} + +// ## Arrays +//@ An *array* in Rust is given be the type `[T; n]`, where `n` is some *fixed* number. So, `[f64; 10]` is an array of 10 floating-point +//@ numbers, all one right next to the other in memory. Arrays are sized, and hence can be used like any other type. But we can also +//@ borrow them as slices, e.g., to sort them. +fn sort_array() { + let mut array_of_data: [f64; 5] = [1.0, 3.4, 12.7, -9.12, 0.1]; + sort(&mut array_of_data); +} + +// ## External Dependencies +//@ This leaves us with just one more piece to complete rgrep: Taking arguments from the command-line. We could now directly work on +//@ [`std::env::args`](http://doc.rust-lang.org/stable/std/env/fn.args.html) to gain access to those arguments, and this would become +//@ a pretty boring lesson in string manipulation. Instead, I want to use this opportunity to show how easy it is to benefit from +//@ other people's work in your program. +//@ +//@ For sure, we are not the first to equip a Rust program with support for command-line arguments. Someone must have written a library +//@ for the job, right? Indeed, someone has. Rust has a central repository of published libraries, called [crates.io](https://crates.io/). +//@ It's a bit like [PyPI](https://pypi.python.org/pypi) or the [Ruby Gems](https://rubygems.org/): Everybody can upload their code, +//@ and there's tooling for importing that code into your project. This tooling is provided by `cargo`, the tool we are already using to +//@ build this tutorial. (`cargo` also has support for *publishing* your crate on crates.io, I refer you to [the documentation](http://doc.crates.io/crates-io.html) for more details.) +//@ In this case, we are going to use the [`docopt` crate](https://crates.io/crates/docopt), which creates a parser for command-line +//@ arguments based on the usage string. External dependencies are declared in the `Cargo.toml` file. + +//@ I already prepared that file, but the declaration of the dependency is still commented out. So please open `Cargo.toml` of your workspace +//@ now, and enabled the two commented-out lines. Then do `cargo build`. Cargo will now download the crate from crates.io, compile it, +//@ and link it to your program. In the future, you can do `cargo update` to make it download new versions of crates you depend on. +//@ Note that crates.io is only the default location for dependencies, you can also give it the URL of a git repository or some local +//@ path. All of this is explained in the [Cargo Guide](http://doc.crates.io/guide.html). + +// I disabled the following module (using a rather bad hack), because it only compiles if `docopt` is linked. +// Remove the attribute of the `rgrep` module to enable compilation. +#[cfg(feature = "disabled")] +pub mod rgrep { + // Now that `docopt` is linked, we can first add it to the namespace and then import shorter names with `use`. We also import some other pieces that we will need. + extern crate docopt; + use self::docopt::Docopt; + use part12::{run, Options, OutputMode}; + use std::process; + + // The `USAGE` string documents how the program is to be called. It's written in a format that `docopt` can parse. + static USAGE: &'static str = " +Usage: rgrep [-c] [-s] ... + +Options: + -c, --count Count number of matching lines (rather than printing them). + -s, --sort Sort the lines before printing. +"; + + // This function extracts the rgrep options from the command-line arguments. + fn get_options() -> Options { + // Parse `argv` and exit the program with an error message if it fails. This is taken from the [`docopt` documentation](http://burntsushi.net/rustdoc/docopt/). + //@ The function `and_then` takes a closure from `T` to `Result`, and uses it to transform a `Result` to a + //@ `Result`. This way, we can chain computations that only happen if the previous one succeeded (and the error + //@ type has to stay the same). In case you know about monads, this style of programming will be familiar to you. + //@ There's a similar function for `Option`. `unwrap_or_else` is a bit like `unwrap`, but rather than panicking in + //@ case of an `Err`, it calls the closure. + let args = Docopt::new(USAGE).and_then(|d| d.parse()).unwrap_or_else(|e| e.exit()); + // Now we can get all the values out. + let count = args.get_bool("-c"); + let sort = args.get_bool("-s"); + let pattern = args.get_str(""); + let files = args.get_vec(""); + if count && sort { + println!("Setting both '-c' and '-s' at the same time does not make any sense."); + process::exit(1); } - }); - // Now we watch the threads working on the counter. - for _ in 0..50 { - thread::sleep_ms(5); - println!("Current value: {}", counter.get()); + // We need to make the strings owned to construct the `Options` instance. + //@ If you check all the types carefully, you will notice that `pattern` above is of type `&str`. `str` is the type of a UTF-8 + //@ encoded string, that is, a bunch of bytes in memory (`[u8]`) that are valid according of UTF-8. `str` is unsized. `&str` + //@ stores the address of the character data, and their length. String literals like "this one" are + //@ of type `&'static str`: They point right to the constant section of the binary, so + //@ However, the borrow is valid for as long as the program runs, hence it has lifetime `'static`. Calling + //@ `to_string` will copy the string data into an owned buffer on the heap, and thus convert it to `String`. + let mode = if count { + OutputMode::Count + } else if sort { + OutputMode::SortAndPrint + } else { + OutputMode::Print + }; + Options { + files: files.iter().map(|file| file.to_string()).collect(), + pattern: pattern.to_string(), + output_mode: mode, + } } - // Finally, we wait for all the threads to finish to be sure we can catch the counter's final value. - handle1.join().unwrap(); - handle2.join().unwrap(); - println!("Final value: {}", counter.get()); + // Finally, we can call the `run` function from the previous part on the options extracted using `get_options`. Edit `main.rs` to call this function. + // You can now use `cargo run -- ` to call your program, and see the argument parser and the threads we wrote previously in action! + pub fn main() { + run(get_options()); /*@*/ + } } -// **Exercise 14.1**: Besides `Mutex`, there's also [`RwLock`](http://doc.rust-lang.org/stable/std/sync/struct.RwLock.html), which -// provides two ways of locking: One that grants only read-only access, to any number of concurrent readers, and another one -// for exclusive write access. (Notice that this is the same pattern we already saw with shared vs. mutable borrows.) Change -// the code above to use `RwLock`, such that multiple calls to `get` can be executed at the same time. -// -// **Exercise 14.2**: Add an operation `compare_and_inc(&self, test: usize, by: usize)` that increments the counter by -// `by` *only if* the current value is `test`. - -//@ ## Sync -//@ In part 12, we talked about types that are marked `Send` and thus can be moved to another thread. However, we did *not* -//@ talk about the question whether a borrow is `Send`. For `&mut T`, the answer is: It is `Send` whenever `T` is send. -//@ `&mut` allows moving values back and forth, it is even possible to [`swap`](http://doc.rust-lang.org/beta/std/mem/fn.swap.html) -//@ the contents of two mutably borrowed values. So in terms of concurrency, sending a mutable borrow is very much like -//@ sending full ownership, in the sense that it can be used to move the object to another thread. -//@ -//@ But what about `&T`, a shared borrow? Without interior mutability, it would always be all-right to send such values. -//@ After all, no mutation can be performed, so there can be as many threads accessing the data as we like. In the -//@ presence of interior mutability though, the story gets more complicated. Rust introduces another marker trait for -//@ this purpose: `Sync`. A type `T` is `Sync` if `&T` is `Send`. Just like `Send`, `Sync` has a default implementation -//@ and is thus automatically implemented for a data-structure *if* all its members implement it. -//@ -//@ Almost all the types we saw so far are `Sync`, with the exception of `Rc`. Remember that a shared borrow is good enough -//@ for cloning, and we don't want other threads to clone our local `Rc`, so it must not be `Sync`. The rule of `Mutex` -//@ is to enforce synchronization, so it should not be entirely surprising that `Mutex` is `Send` *and* `Sync` provided that -//@ `T` is `Send`. -//@ -//@ In the next part, we will learn about a type called `RefCell` that is `Send`, but not `Sync`. -//@ -//@ You may be curious whether there is a type that's `Sync`, but not `Send`. There are indeed rather esoteric examples -//@ of such types, but that's not a topic I want to go into. In case you are curious, there's a -//@ [Rust RFC](https://github.com/rust-lang/rfcs/blob/master/text/0458-send-improvements.md), which contains a type `RcMut` that would be `Sync` and not `Send`. -//@ You may also be interested in [this blog post](https://huonw.github.io/blog/2015/02/some-notes-on-send-and-sync/) on the topic. +// **Exercise 13.3**: Wouldn't it be nice if rgrep supported regular expressions? There's already a crate that does all the parsing and matching on regular +// expression, it's called [regex](https://crates.io/crates/regex). Add this crate to the dependencies of your workspace, add an option ("-r") to switch +// the pattern to regular-expression mode, and change `filter_lines` to honor this option. The documentation of regex is available from its crates.io site. +// (You won't be able to use the `regex!` macro if you are on the stable or beta channel of Rust. But it wouldn't help for our use-case anyway.) -//@ [index](main.html) | [previous](part13.html) | [next](main.html) +//@ [index](main.html) | [previous](part13.html) | [next](part15.html) diff --git a/src/part15.rs b/src/part15.rs index 7365421..a783689 100644 --- a/src/part15.rs +++ b/src/part15.rs @@ -1,5 +1,145 @@ -// Rust-101, Part 15: Interior Mutability (cont.), RefCell, Cell, Drop -// =================================================================== +// Rust-101, Part 15: Mutex, Interior Mutability (cont.), Sync +// =========================================================== + +use std::sync::{Arc, Mutex}; +use std::thread; + +//@ We already saw that we can use `Arc` to share memory between threads. However, `Arc` can only provide *read-only* +//@ access to memory: Since there is aliasing, Rust cannot, in general, permit mutation. If however, +//@ some care would be taken at run-time, then mutation would still be all right: We have to ensure that whenever +//@ someone changes the data, nobody else is looking at it. In other words, we need a *critical section* or (as it +//@ is called in Rust) a [`Mutex`](http://doc.rust-lang.org/stable/std/sync/struct.Mutex.html). Some other languages also call this a *lock*. +//@ +//@ As an example, let us write a concurrent counter. As usual in Rust, we first have to think about our data layout. +//@ In case of the mutex, this means we have to declare the type of the data that we want to be protected. In Rust, +//@ a `Mutex` protects data, not code - and it is impossible to access the data in any other way. This is generally considered +//@ good style, but other languages typically lack the ability to actually enforce this. +//@ Of course, we want multiple threads to have access to this `Mutex`, so we wrap it in an `Arc`. +//@ +//@ Rather than giving every field a name, a struct can also be defined by just giving a sequence of types (similar +//@ to how a variant of an `enum` is defined). This is called a *tuple struct*. It is often used when constructing +//@ a *newtype*, as we do here: `ConcurrentCounter` is essentially just a new name for `Arc>`. However, +//@ is is a locally declared types, so we can give it an inherent implementation and implement traits for it. Since the +//@ field is private, nobody outside this module can even know the type we are wrapping. + +// The derived `Clone` implementation will clone the `Arc`, so all clones will actually talk about the same counter. +#[derive(Clone)] +struct ConcurrentCounter(Arc>); + +impl ConcurrentCounter { + // The constructor just wraps the constructors of `Arc` and `Mutex`. + pub fn new(val: usize) -> Self { + ConcurrentCounter(Arc::new(Mutex::new(val))) /*@*/ + } + + //@ The core operation is, of course, `increment`. The type may be surprising at first: A shared borrow? + //@ How can this be, since `increment` definitely modifies the counter? We already discussed above that `Mutex` is + //@ a way to get around this restriction in Rust. This phenomenon of data that can be mutated through a shared + //@ borrow is called *interior mutability*: We are changing the inner parts of the object, but seen from the outside, + //@ this does not count as "mutation". This stands in contrast to *exterior mutability*, which is the kind of + //@ mutability we saw so far, where one piece of data is replaced by something else of the same type. If you are familiar + //@ with languages like ML, you can compare this to how something of type `ref` permits mutation, even though it is + //@ itself a functional value (more precisely, a location) like all the others. + //@ + //@ Interior mutability breaks the rules of Rust that I outlined earlier: There is aliasing (a shared borrow) and mutation. + //@ The reason that this still works is careful programming of the primitives for interior mutability - in this case, that's + //@ `Mutex`. It has to ensure with dynamic checks, at run-time, that things don't fall apart. In particular, it has to ensure + //@ that the data covered by the mutex can only ever be accessed from inside a critical section. This is where Rust's type + //@ system comes into play: With its discipline of ownership and borrowing, it can enforce such rules. Let's see how this goes. + pub fn increment(&self, by: usize) { + // `lock` on a mutex returns a *guard*, giving access to the data contained in the mutex. + //@ (We will discuss the `unwrap` soon.) `.0` is how we access the first component of a tuple or a struct. + let mut counter = self.0.lock().unwrap(); + //@ The guard is another example of a smart pointer, and it can be used as if it were a pointer to the data protected + //@ by the lock. + *counter = *counter + by; + //@ At the end of the function, `counter` is dropped and the mutex is available again. + //@ This can only happen when full ownership of the guard is given up. In particular, it is impossible for us + //@ to borrow some of its content, release the lock of the mutex, and subsequently access the protected data without holding + //@ the lock. Enforcing the locking discipline is expressible in the Rust type system, so we don't have to worry + //@ about data races *even though* we are mutating shared memory! + //@ + //@ One of the subtle aspects of locking is *poisoning*. If a thread panics while it holds a lock, it could leave the + //@ data-structure in a bad state. The lock is hence considered *poisoned*. Future attempts to `lock` it will fail. + //@ Above, we simply assert via `unwrap` that this will never happen. Alternatively, we could have a look at the poisoned + //@ state and attempt to recover from it. + } + + // The function `get` returns the current value of the counter. + pub fn get(&self) -> usize { + let counter = self.0.lock().unwrap(); /*@*/ + *counter /*@*/ + } +} + +// Now our counter is ready for action. +pub fn main() { + let counter = ConcurrentCounter::new(0); + + // We clone the counter for the first thread, which increments it by 2 every 15ms. + let counter1 = counter.clone(); + let handle1 = thread::spawn(move || { + for _ in 0..10 { + thread::sleep_ms(15); + counter1.increment(2); + } + }); + + // The second thread increments the counter by 3 every 20ms. + let counter2 = counter.clone(); + let handle2 = thread::spawn(move || { + for _ in 0..10 { + thread::sleep_ms(20); + counter2.increment(3); + } + }); + + // Now we watch the threads working on the counter. + for _ in 0..50 { + thread::sleep_ms(5); + println!("Current value: {}", counter.get()); + } + + // Finally, we wait for all the threads to finish to be sure we can catch the counter's final value. + handle1.join().unwrap(); + handle2.join().unwrap(); + println!("Final value: {}", counter.get()); +} + +// **Exercise 14.1**: Besides `Mutex`, there's also [`RwLock`](http://doc.rust-lang.org/stable/std/sync/struct.RwLock.html), which +// provides two ways of locking: One that grants only read-only access, to any number of concurrent readers, and another one +// for exclusive write access. (Notice that this is the same pattern we already saw with shared vs. mutable borrows.) Change +// the code above to use `RwLock`, such that multiple calls to `get` can be executed at the same time. +// +// **Exercise 14.2**: Add an operation `compare_and_inc(&self, test: usize, by: usize)` that increments the counter by +// `by` *only if* the current value is `test`. + +//@ ## Sync +//@ In part 12, we talked about types that are marked `Send` and thus can be moved to another thread. However, we did *not* +//@ talk about the question whether a borrow is `Send`. For `&mut T`, the answer is: It is `Send` whenever `T` is send. +//@ `&mut` allows moving values back and forth, it is even possible to [`swap`](http://doc.rust-lang.org/beta/std/mem/fn.swap.html) +//@ the contents of two mutably borrowed values. So in terms of concurrency, sending a mutable borrow is very much like +//@ sending full ownership, in the sense that it can be used to move the object to another thread. +//@ +//@ But what about `&T`, a shared borrow? Without interior mutability, it would always be all-right to send such values. +//@ After all, no mutation can be performed, so there can be as many threads accessing the data as we like. In the +//@ presence of interior mutability though, the story gets more complicated. Rust introduces another marker trait for +//@ this purpose: `Sync`. A type `T` is `Sync` if `&T` is `Send`. Just like `Send`, `Sync` has a default implementation +//@ and is thus automatically implemented for a data-structure *if* all its members implement it. +//@ +//@ Almost all the types we saw so far are `Sync`, with the exception of `Rc`. Remember that a shared borrow is good enough +//@ for cloning, and we don't want other threads to clone our local `Rc`, so it must not be `Sync`. The rule of `Mutex` +//@ is to enforce synchronization, so it should not be entirely surprising that `Mutex` is `Send` *and* `Sync` provided that +//@ `T` is `Send`. +//@ +//@ In the next part, we will learn about a type called `RefCell` that is `Send`, but not `Sync`. +//@ +//@ You may be curious whether there is a type that's `Sync`, but not `Send`. There are indeed rather esoteric examples +//@ of such types, but that's not a topic I want to go into. In case you are curious, there's a +//@ [Rust RFC](https://github.com/rust-lang/rfcs/blob/master/text/0458-send-improvements.md), which contains a type `RcMut` that would be `Sync` and not `Send`. +//@ You may also be interested in [this blog post](https://huonw.github.io/blog/2015/02/some-notes-on-send-and-sync/) on the topic. + +// FIXME TODO some old outdated explanation FIXME TODO //@ [`RefCell`](http://doc.rust-lang.org/beta/std/cell/struct.RefCell.html) //@ [`is very much like `RwLock`, but it's not thread-safe: "Locking" is done without atomic operations. @@ -22,4 +162,5 @@ //@ the thread-safe `RwLock`. And finally, in case a distinction between readers and writers is not helpful, one can use the //@ more efficient `Mutex`. + //@ [index](main.html) | [previous](part14.html) | [next](main.html) diff --git a/workspace/src/part11.rs b/workspace/src/part11.rs index cc2a252..ac19371 100644 --- a/workspace/src/part11.rs +++ b/workspace/src/part11.rs @@ -1,97 +1,65 @@ -// Rust-101, Part 11: Trait Objects, Box, Rc, Lifetime bounds -// ========================================================== +// Rust-101, Part 11: Trait Objects, Box, Lifetime bounds +// ====================================================== -mod callbacks { - // For now, we just decide that the callbacks have an argument of type `i32`. - struct CallbacksV1 { - callbacks: Vec, - } - - /* struct CallbacksV2 { - callbacks: Vec, - } */ - pub struct Callbacks { - callbacks: Vec>, - } +// For now, we just decide that the callbacks have an argument of type `i32`. +struct CallbacksV1 { + callbacks: Vec, +} - impl Callbacks { - // Now we can provide some functions. The constructor should be straight-forward. - pub fn new() -> Self { - unimplemented!() - } +/* struct CallbacksV2 { + callbacks: Vec, +} */ - // Registration simply stores the callback. - pub fn register(&mut self, callback: Box) { - unimplemented!() - } +pub struct Callbacks { + callbacks: Vec>, +} - // And here we call all the stored callbacks. - pub fn call(&mut self, val: i32) { - // Since they are of type `FnMut`, we need to mutably iterate. Notice that boxes dereference implicitly. - for callback in self.callbacks.iter_mut() { - unimplemented!() - } - } +impl Callbacks { + // Now we can provide some functions. The constructor should be straight-forward. + pub fn new() -> Self { + unimplemented!() } - // Now we are ready for the demo. - pub fn demo(c: &mut Callbacks) { - c.register(Box::new(|val| println!("Callback 1: {}", val))); - c.call(0); - - let mut count: usize = 0; - c.register(Box::new(move |val| { - count = count+1; - println!("Callback 2, {}. time: {}", count, val); - } )); - c.call(1); c.call(2); + // Registration simply stores the callback. + pub fn register(&mut self, callback: Box) { + unimplemented!() } -} -// Remember to edit `main.rs` to run the demo. -pub fn main() { - let mut c = callbacks::Callbacks::new(); - callbacks::demo(&mut c); -} - -mod callbacks_clone { - - use std::rc::Rc; - - #[derive(Clone)] - pub struct Callbacks { - callbacks: Vec>, + // We can also write a generic version of `register`, such that it will be instantiated with some concrete closure type `F` + // and do the creation of the `Box` and the conversion from `F` to `FnMut(i32)` itself. + + pub fn register_generic(&mut self, callback: F) { + unimplemented!() } - impl Callbacks { - pub fn new() -> Self { - unimplemented!() - } - - // For the `register` function, we don't actually have to use trait objects in the argument. - - pub fn register(&mut self, callback: F) { + // And here we call all the stored callbacks. + pub fn call(&mut self, val: i32) { + // Since they are of type `FnMut`, we need to mutably iterate. + for callback in self.callbacks.iter_mut() { unimplemented!() } - - pub fn call(&mut self, val: i32) { - // We only need a shared iterator here. `Rc` also implicitly dereferences, so we can simply call the callback. - for callback in self.callbacks.iter() { - unimplemented!() - } - } } +} + +// Now we are ready for the demo. Remember to edit `main.rs` to run it. +pub fn main() { + let mut c = Callbacks::new(); + c.register(Box::new(|val| println!("Callback 1: {}", val))); + c.call(0); - // The demo works just as above. Our counting callback doesn't work anymore though, because we are using `Fn` now. - fn demo(c: &mut Callbacks) { - c.register(|val| println!("Callback 1: {}", val)); - c.call(0); c.call(1); + { + let mut count: usize = 0; + c.register_generic(move |val| { + count = count+1; + println!("Callback 2: {} ({}. time)", val, count); + } ); } + c.call(1); c.call(2); } + // **Exercise 11.1**: We made the arbitrary choice of using `i32` for the arguments. Generalize the data-structures above // to work with an arbitrary type `T` that's passed to the callbacks. Since you need to call multiple callbacks with the // same `t: T`, you will either have to restrict `T` to `Copy` types, or pass a borrow. - diff --git a/workspace/src/part12.rs b/workspace/src/part12.rs index 4996ac1..a351995 100644 --- a/workspace/src/part12.rs +++ b/workspace/src/part12.rs @@ -1,128 +1,110 @@ -// Rust-101, Part 12: Concurrency, Arc, Send -// ========================================= - -use std::io::prelude::*; -use std::{io, fs, thread}; -use std::sync::mpsc::{sync_channel, SyncSender, Receiver}; -use std::sync::Arc; - - -// Before we come to the actual code, we define a data-structure `Options` to store all the information we need -// to complete the job: Which files to work on, which pattern to look for, and how to output.
-#[derive(Clone,Copy)] -pub enum OutputMode { - Print, - SortAndPrint, - Count, -} -use self::OutputMode::*; +// Rust-101, Part 12: Rc, Interior Mutability, Cell, RefCell +// ========================================================= -pub struct Options { - pub files: Vec, - pub pattern: String, - pub output_mode: OutputMode, -} +use std::rc::Rc; +use std::cell::{Cell, RefCell}; -// The first function reads the files, and sends every line over the `out_channel`. -fn read_files(options: Arc, out_channel: SyncSender) { - for file in options.files.iter() { - // First, we open the file, ignoring any errors. - let file = fs::File::open(file).unwrap(); - // Then we obtain a `BufReader` for it, which provides the `lines` function. - let file = io::BufReader::new(file); - for line in file.lines() { - let line = line.unwrap(); - // Now we send the line over the channel, ignoring the possibility of `send` failing. - out_channel.send(line).unwrap(); - } - } - // When we drop the `out_channel`, it will be closed, which the other end can notice. + +#[derive(Clone)] +struct Callbacks { + callbacks: Vec>, } -// The second function filters the lines it receives through `in_channel` with the pattern, and sends -// matches via `out_channel`. -fn filter_lines(options: Arc, - in_channel: Receiver, - out_channel: SyncSender) { - // We can simply iterate over the channel, which will stop when the channel is closed. - for line in in_channel.iter() { - // `contains` works on lots of types of patterns, but in particular, we can use it to test whether - // one string is contained in another. This is another example of Rust using traits as substitute for overloading. - if line.contains(&options.pattern) { - unimplemented!() - } +impl Callbacks { + pub fn new() -> Self { + unimplemented!() } -} -// The third function performs the output operations, receiving the relevant lines on its `in_channel`. -fn output_lines(options: Arc, in_channel: Receiver) { - match options.output_mode { - Print => { - // Here, we just print every line we see. - for line in in_channel.iter() { - unimplemented!() - } - }, - Count => { - // We are supposed to count the number of matching lines. There's a convenient iterator adapter that - // we can use for this job. - unimplemented!() - }, - SortAndPrint => { - // We are asked to sort the matching lines before printing. So let's collect them all in a local vector... - let mut data: Vec = in_channel.iter().collect(); - // ...and implement the actual sorting later. + // Registration works just like last time, except that we are creating an `Rc` now. + pub fn register(&mut self, callback: F) { + unimplemented!() + } + + pub fn call(&self, val: i32) { + // We only need a shared iterator here. Since `Rc` is a smart pointer, we can directly call the callback. + for callback in self.callbacks.iter() { unimplemented!() } } } -// With the operations of the three threads defined, we can now implement a function that performs grepping according -// to some given options. -pub fn run(options: Options) { - // We move the `options` into an `Arc`, as that's what the thread workers expect. - let options = Arc::new(options); - - // This sets up the channels. We use a `sync_channel` with buffer-size of 16 to avoid needlessly filling RAM. - let (line_sender, line_receiver) = sync_channel(16); - let (filtered_sender, filtered_receiver) = sync_channel(16); - - // Spawn the read thread: `thread::spawn` takes a closure that is run in a new thread. - let options1 = options.clone(); - let handle1 = thread::spawn(move || read_files(options1, line_sender)); - - // Same with the filter thread. - let options2 = options.clone(); - let handle2 = thread::spawn(move || { - filter_lines(options2, line_receiver, filtered_sender) - }); - - // And the output thread. - let options3 = options.clone(); - let handle3 = thread::spawn(move || output_lines(options3, filtered_receiver)); - - // Finally, wait until all three threads did their job. - handle1.join().unwrap(); - handle2.join().unwrap(); - handle3.join().unwrap(); +// Time for a demo! +fn demo(c: &mut Callbacks) { + c.register(|val| println!("Callback 1: {}", val)); + c.call(0); c.clone().call(1); } -// Now we have all the pieces together for testing our rgrep with some hard-coded options. pub fn main() { - let options = Options { - files: vec!["src/part10.rs".to_string(), - "src/part11.rs".to_string(), - "src/part12.rs".to_string()], - pattern: "let".to_string(), - output_mode: Print - }; - run(options); + let mut c = Callbacks::new(); + demo(&mut c); +} + +// ## Interior Mutability + +// So, let us put our counter in a `Cell`, and replicate the example from the previous part. +fn demo_cell(c: &mut Callbacks) { + { + let count = Cell::new(0); + // Again, we have to move ownership if the `count` into the environment closure. + c.register(move |val| { + // In here, all we have is a shared borrow of our environment. But that's good enough for the `get` and `set` of the cell! + let new_count = count.get()+1; + count.set(new_count); + println!("Callback 2: {} ({}. time)", val, new_count); + } ); + } + + c.call(2); c.clone().call(3); } -// **Exercise 12.1**: Change rgrep such that it prints not only the matching lines, but also the name of the file -// and the number of the line in the file. You will have to change the type of the channels from `String` to something -// that records this extra information. +// ## `RefCell` + +// Our final version of `Callbacks` puts the closure environment into a `RefCell`. +#[derive(Clone)] +struct CallbacksMut { + callbacks: Vec>>, +} + +impl CallbacksMut { + pub fn new() -> Self { + unimplemented!() + } + + pub fn register(&mut self, callback: F) { + let cell = Rc::new(RefCell::new(callback)); + unimplemented!() + } + + pub fn call(&mut self, val: i32) { + for callback in self.callbacks.iter() { + // We have to *explicitly* borrow the contents of a `RefCell` by calling `borrow` or `borrow_mut`. + let mut closure = callback.borrow_mut(); + // Unfortunately, Rust's auto-dereference of pointers is not clever enough here. We thus have to explicitly + // dereference the smart pointer and obtain a mutable borrow of the content. + (&mut *closure)(val); + } + } +} + +// Now we can repeat the demo from the previous part - but this time, our `CallbacksMut` type +// can be cloned. +fn demo_mut(c: &mut CallbacksMut) { + c.register(|val| println!("Callback 1: {}", val)); + c.call(0); + + { + let mut count: usize = 0; + c.register(move |val| { + count = count+1; + println!("Callback 2: {} ({}. time)", val, count); + } ); + } + c.call(1); c.clone().call(2); +} +// **Exercise 12.1**: Change the type of `call` to ask only for a shared borrow. Then write some piece of code using only the available, public +// interface of `CallbacksMut` such that a reentrant call to `call` is happening, and the program aborts because the `RefCell` refuses to hand +// out a second mutable borrow to its content. diff --git a/workspace/src/part13.rs b/workspace/src/part13.rs index 2be4353..501fb7d 100644 --- a/workspace/src/part13.rs +++ b/workspace/src/part13.rs @@ -1,107 +1,128 @@ -// Rust-101, Part 13: Slices, Arrays, External Dependencies -// ======================================================== - +// Rust-101, Part 13: Concurrency, Arc, Send +// ========================================= + +use std::io::prelude::*; +use std::{io, fs, thread}; +use std::sync::mpsc::{sync_channel, SyncSender, Receiver}; +use std::sync::Arc; + + +// Before we come to the actual code, we define a data-structure `Options` to store all the information we need +// to complete the job: Which files to work on, which pattern to look for, and how to output.
+#[derive(Clone,Copy)] +pub enum OutputMode { + Print, + SortAndPrint, + Count, +} +use self::OutputMode::*; -// ## Slices +pub struct Options { + pub files: Vec, + pub pattern: String, + pub output_mode: OutputMode, +} -pub fn sort(data: &mut [T]) { - if data.len() < 2 { return; } - // We decide that the element at 0 is our pivot, and then we move our cursors through the rest of the slice, - // making sure that everything on the left is no larger than the pivot, and everything on the right is no smaller. - let mut lpos = 1; - let mut rpos = data.len(); - /* Invariant: pivot is data[0]; everything with index (0,lpos) is <= pivot; - [rpos,len) is >= pivot; lpos < rpos */ - loop { - // **Exercise 13.1**: Complete this Quicksort loop. You can use `swap` on slices to swap two elements. Write a - // test function for `sort`. - unimplemented!() +// The first function reads the files, and sends every line over the `out_channel`. +fn read_files(options: Arc, out_channel: SyncSender) { + for file in options.files.iter() { + // First, we open the file, ignoring any errors. + let file = fs::File::open(file).unwrap(); + // Then we obtain a `BufReader` for it, which provides the `lines` function. + let file = io::BufReader::new(file); + for line in file.lines() { + let line = line.unwrap(); + // Now we send the line over the channel, ignoring the possibility of `send` failing. + out_channel.send(line).unwrap(); + } } - - // Once our cursors met, we need to put the pivot in the right place. - data.swap(0, lpos-1); - - // Finally, we split our slice to sort the two halves. The nice part about slices is that splitting them is cheap: - let (part1, part2) = data.split_at_mut(lpos); - unimplemented!() + // When we drop the `out_channel`, it will be closed, which the other end can notice. } -// **Exercise 13.2**: Since `String` implements `PartialEq`, you can now change the function `output_lines` in the previous part -// to call the sort function above. If you did exercise 12.1, you will have slightly more work. Make sure you sort by the matched line -// only, not by filename or line number! +// The second function filters the lines it receives through `in_channel` with the pattern, and sends +// matches via `out_channel`. +fn filter_lines(options: Arc, + in_channel: Receiver, + out_channel: SyncSender) { + // We can simply iterate over the channel, which will stop when the channel is closed. + for line in in_channel.iter() { + // `contains` works on lots of types of patterns, but in particular, we can use it to test whether + // one string is contained in another. This is another example of Rust using traits as substitute for overloading. + if line.contains(&options.pattern) { + unimplemented!() + } + } +} -// Now, we can sort, e.g., an vector of numbers. -fn sort_nums(data: &mut Vec) { - sort(&mut data[..]); +// The third function performs the output operations, receiving the relevant lines on its `in_channel`. +fn output_lines(options: Arc, in_channel: Receiver) { + match options.output_mode { + Print => { + // Here, we just print every line we see. + for line in in_channel.iter() { + unimplemented!() + } + }, + Count => { + // We are supposed to count the number of matching lines. There's a convenient iterator adapter that + // we can use for this job. + unimplemented!() + }, + SortAndPrint => { + // We are asked to sort the matching lines before printing. So let's collect them all in a local vector... + let mut data: Vec = in_channel.iter().collect(); + // ...and implement the actual sorting later. + unimplemented!() + } + } } -// ## Arrays -fn sort_array() { - let mut array_of_data: [f64; 5] = [1.0, 3.4, 12.7, -9.12, 0.1]; - sort(&mut array_of_data); +// With the operations of the three threads defined, we can now implement a function that performs grepping according +// to some given options. +pub fn run(options: Options) { + // We move the `options` into an `Arc`, as that's what the thread workers expect. + let options = Arc::new(options); + + // This sets up the channels. We use a `sync_channel` with buffer-size of 16 to avoid needlessly filling RAM. + let (line_sender, line_receiver) = sync_channel(16); + let (filtered_sender, filtered_receiver) = sync_channel(16); + + // Spawn the read thread: `thread::spawn` takes a closure that is run in a new thread. + let options1 = options.clone(); + let handle1 = thread::spawn(move || read_files(options1, line_sender)); + + // Same with the filter thread. + let options2 = options.clone(); + let handle2 = thread::spawn(move || { + filter_lines(options2, line_receiver, filtered_sender) + }); + + // And the output thread. + let options3 = options.clone(); + let handle3 = thread::spawn(move || output_lines(options3, filtered_receiver)); + + // Finally, wait until all three threads did their job. + handle1.join().unwrap(); + handle2.join().unwrap(); + handle3.join().unwrap(); } -// ## External Dependencies - - -// I disabled the following module (using a rather bad hack), because it only compiles if `docopt` is linked. -// Remove the attribute of the `rgrep` module to enable compilation. -#[cfg(feature = "disabled")] -pub mod rgrep { - // Now that `docopt` is linked, we can first root it in the namespace and then import it with `use`. We also import some other pieces that we will need. - extern crate docopt; - use self::docopt::Docopt; - use part12::{run, Options, OutputMode}; - use std::process; - - // The `USAGE` string documents how the program is to be called. It's written in a format that `docopt` can parse. - static USAGE: &'static str = " -Usage: rgrep [-c] [-s] ... - -Options: - -c, --count Count number of matching lines (rather than printing them). - -s, --sort Sort the lines before printing. -"; - - // This function extracts the rgrep options from the command-line arguments. - fn get_options() -> Options { - // Parse `argv` and exit the program with an error message if it fails. This is taken from the [`docopt` documentation](http://burntsushi.net/rustdoc/docopt/). - let args = Docopt::new(USAGE).and_then(|d| d.parse()).unwrap_or_else(|e| e.exit()); - // Now we can get all the values out. - let count = args.get_bool("-c"); - let sort = args.get_bool("-s"); - let pattern = args.get_str(""); - let files = args.get_vec(""); - if count && sort { - println!("Setting both '-c' and '-s' at the same time does not make any sense."); - process::exit(1); - } +// Now we have all the pieces together for testing our rgrep with some hard-coded options. +pub fn main() { + let options = Options { + files: vec!["src/part10.rs".to_string(), + "src/part11.rs".to_string(), + "src/part12.rs".to_string()], + pattern: "let".to_string(), + output_mode: Print + }; + run(options); +} - // We need to make the strings owned to construct the `Options` instance. - let mode = if count { - OutputMode::Count - } else if sort { - OutputMode::SortAndPrint - } else { - OutputMode::Print - }; - Options { - files: files.iter().map(|file| file.to_string()).collect(), - pattern: pattern.to_string(), - output_mode: mode, - } - } +// **Exercise 12.1**: Change rgrep such that it prints not only the matching lines, but also the name of the file +// and the number of the line in the file. You will have to change the type of the channels from `String` to something +// that records this extra information. - // Finally, we can call the `run` function from the previous part on the options extracted using `get_options`. Edit `main.rs` to call this function. - // You can now use `cargo run -- ` to call your program, and see the argument parser and the threads we wrote previously in action! - pub fn main() { - unimplemented!() - } -} -// **Exercise 13.3**: Wouldn't it be nice if rgrep supported regular expressions? There's already a crate that does all the parsing and matching on regular -// expression, it's called [regex](https://crates.io/crates/regex). Add this crate to the dependencies of your workspace, add an option ("-r") to switch -// the pattern to regular-expression mode, and change `filter_lines` to honor this option. The documentation of regex is available from its crates.io site. -// (You won't be able to use the `regex!` macro if you are on the stable or beta channel of Rust. But it wouldn't help for our use-case anyway.) diff --git a/workspace/src/part14.rs b/workspace/src/part14.rs index d939265..6e007aa 100644 --- a/workspace/src/part14.rs +++ b/workspace/src/part14.rs @@ -1,72 +1,107 @@ -// Rust-101, Part 14: Mutex, Interior Mutability, Sync -// =================================================== +// Rust-101, Part 14: Slices, Arrays, External Dependencies +// ======================================================== -use std::sync::{Arc, Mutex}; -use std::thread; +// ## Slices -// The derived `Clone` implementation will clone the `Arc`, so all clones will actually talk about the same counter. -#[derive(Clone)] -struct ConcurrentCounter(Arc>); +pub fn sort(data: &mut [T]) { + if data.len() < 2 { return; } -impl ConcurrentCounter { - // The constructor just wraps the constructors of `Arc` and `Mutex`. - pub fn new(val: usize) -> Self { + // We decide that the element at 0 is our pivot, and then we move our cursors through the rest of the slice, + // making sure that everything on the left is no larger than the pivot, and everything on the right is no smaller. + let mut lpos = 1; + let mut rpos = data.len(); + /* Invariant: pivot is data[0]; everything with index (0,lpos) is <= pivot; + [rpos,len) is >= pivot; lpos < rpos */ + loop { + // **Exercise 13.1**: Complete this Quicksort loop. You can use `swap` on slices to swap two elements. Write a + // test function for `sort`. unimplemented!() } - pub fn increment(&self, by: usize) { - // `lock` on a mutex returns a *guard*, giving access to the data contained in the mutex. - let mut counter = self.0.lock().unwrap(); - *counter = *counter + by; - } + // Once our cursors met, we need to put the pivot in the right place. + data.swap(0, lpos-1); - // The function `get` returns the current value of the counter. - pub fn get(&self) -> usize { - unimplemented!() - } + // Finally, we split our slice to sort the two halves. The nice part about slices is that splitting them is cheap: + let (part1, part2) = data.split_at_mut(lpos); + unimplemented!() } -// Now our counter is ready for action. -pub fn main() { - let counter = ConcurrentCounter::new(0); +// **Exercise 13.2**: Since `String` implements `PartialEq`, you can now change the function `output_lines` in the previous part +// to call the sort function above. If you did exercise 12.1, you will have slightly more work. Make sure you sort by the matched line +// only, not by filename or line number! - // We clone the counter for the first thread, which increments it by 2 every 15ms. - let counter1 = counter.clone(); - let handle1 = thread::spawn(move || { - for _ in 0..10 { - thread::sleep_ms(15); - counter1.increment(2); - } - }); - - // The second thread increments the counter by 3 every 20ms. - let counter2 = counter.clone(); - let handle2 = thread::spawn(move || { - for _ in 0..10 { - thread::sleep_ms(20); - counter2.increment(3); +// Now, we can sort, e.g., an vector of numbers. +fn sort_nums(data: &mut Vec) { + sort(&mut data[..]); +} + +// ## Arrays +fn sort_array() { + let mut array_of_data: [f64; 5] = [1.0, 3.4, 12.7, -9.12, 0.1]; + sort(&mut array_of_data); +} + +// ## External Dependencies + + +// I disabled the following module (using a rather bad hack), because it only compiles if `docopt` is linked. +// Remove the attribute of the `rgrep` module to enable compilation. +#[cfg(feature = "disabled")] +pub mod rgrep { + // Now that `docopt` is linked, we can first add it to the namespace and then import shorter names with `use`. We also import some other pieces that we will need. + extern crate docopt; + use self::docopt::Docopt; + use part12::{run, Options, OutputMode}; + use std::process; + + // The `USAGE` string documents how the program is to be called. It's written in a format that `docopt` can parse. + static USAGE: &'static str = " +Usage: rgrep [-c] [-s] ... + +Options: + -c, --count Count number of matching lines (rather than printing them). + -s, --sort Sort the lines before printing. +"; + + // This function extracts the rgrep options from the command-line arguments. + fn get_options() -> Options { + // Parse `argv` and exit the program with an error message if it fails. This is taken from the [`docopt` documentation](http://burntsushi.net/rustdoc/docopt/). + let args = Docopt::new(USAGE).and_then(|d| d.parse()).unwrap_or_else(|e| e.exit()); + // Now we can get all the values out. + let count = args.get_bool("-c"); + let sort = args.get_bool("-s"); + let pattern = args.get_str(""); + let files = args.get_vec(""); + if count && sort { + println!("Setting both '-c' and '-s' at the same time does not make any sense."); + process::exit(1); } - }); - // Now we watch the threads working on the counter. - for _ in 0..50 { - thread::sleep_ms(5); - println!("Current value: {}", counter.get()); + // We need to make the strings owned to construct the `Options` instance. + let mode = if count { + OutputMode::Count + } else if sort { + OutputMode::SortAndPrint + } else { + OutputMode::Print + }; + Options { + files: files.iter().map(|file| file.to_string()).collect(), + pattern: pattern.to_string(), + output_mode: mode, + } } - // Finally, we wait for all the threads to finish to be sure we can catch the counter's final value. - handle1.join().unwrap(); - handle2.join().unwrap(); - println!("Final value: {}", counter.get()); + // Finally, we can call the `run` function from the previous part on the options extracted using `get_options`. Edit `main.rs` to call this function. + // You can now use `cargo run -- ` to call your program, and see the argument parser and the threads we wrote previously in action! + pub fn main() { + unimplemented!() + } } -// **Exercise 14.1**: Besides `Mutex`, there's also [`RwLock`](http://doc.rust-lang.org/stable/std/sync/struct.RwLock.html), which -// provides two ways of locking: One that grants only read-only access, to any number of concurrent readers, and another one -// for exclusive write access. (Notice that this is the same pattern we already saw with shared vs. mutable borrows.) Change -// the code above to use `RwLock`, such that multiple calls to `get` can be executed at the same time. -// -// **Exercise 14.2**: Add an operation `compare_and_inc(&self, test: usize, by: usize)` that increments the counter by -// `by` *only if* the current value is `test`. - +// **Exercise 13.3**: Wouldn't it be nice if rgrep supported regular expressions? There's already a crate that does all the parsing and matching on regular +// expression, it's called [regex](https://crates.io/crates/regex). Add this crate to the dependencies of your workspace, add an option ("-r") to switch +// the pattern to regular-expression mode, and change `filter_lines` to honor this option. The documentation of regex is available from its crates.io site. +// (You won't be able to use the `regex!` macro if you are on the stable or beta channel of Rust. But it wouldn't help for our use-case anyway.) diff --git a/workspace/src/part15.rs b/workspace/src/part15.rs index 4dca116..d006b23 100644 --- a/workspace/src/part15.rs +++ b/workspace/src/part15.rs @@ -1,4 +1,76 @@ -// Rust-101, Part 15: Interior Mutability (cont.), RefCell, Cell, Drop -// =================================================================== +// Rust-101, Part 15: Mutex, Interior Mutability (cont.), Sync +// =========================================================== + +use std::sync::{Arc, Mutex}; +use std::thread; + + +// The derived `Clone` implementation will clone the `Arc`, so all clones will actually talk about the same counter. +#[derive(Clone)] +struct ConcurrentCounter(Arc>); + +impl ConcurrentCounter { + // The constructor just wraps the constructors of `Arc` and `Mutex`. + pub fn new(val: usize) -> Self { + unimplemented!() + } + + pub fn increment(&self, by: usize) { + // `lock` on a mutex returns a *guard*, giving access to the data contained in the mutex. + let mut counter = self.0.lock().unwrap(); + *counter = *counter + by; + } + + // The function `get` returns the current value of the counter. + pub fn get(&self) -> usize { + unimplemented!() + } +} + +// Now our counter is ready for action. +pub fn main() { + let counter = ConcurrentCounter::new(0); + + // We clone the counter for the first thread, which increments it by 2 every 15ms. + let counter1 = counter.clone(); + let handle1 = thread::spawn(move || { + for _ in 0..10 { + thread::sleep_ms(15); + counter1.increment(2); + } + }); + + // The second thread increments the counter by 3 every 20ms. + let counter2 = counter.clone(); + let handle2 = thread::spawn(move || { + for _ in 0..10 { + thread::sleep_ms(20); + counter2.increment(3); + } + }); + + // Now we watch the threads working on the counter. + for _ in 0..50 { + thread::sleep_ms(5); + println!("Current value: {}", counter.get()); + } + + // Finally, we wait for all the threads to finish to be sure we can catch the counter's final value. + handle1.join().unwrap(); + handle2.join().unwrap(); + println!("Final value: {}", counter.get()); +} + +// **Exercise 14.1**: Besides `Mutex`, there's also [`RwLock`](http://doc.rust-lang.org/stable/std/sync/struct.RwLock.html), which +// provides two ways of locking: One that grants only read-only access, to any number of concurrent readers, and another one +// for exclusive write access. (Notice that this is the same pattern we already saw with shared vs. mutable borrows.) Change +// the code above to use `RwLock`, such that multiple calls to `get` can be executed at the same time. +// +// **Exercise 14.2**: Add an operation `compare_and_inc(&self, test: usize, by: usize)` that increments the counter by +// `by` *only if* the current value is `test`. + + +// FIXME TODO some old outdated explanation FIXME TODO + -- 2.30.2 From 5baae0ea037ed642b7fe8975fb3004b29827d5b1 Mon Sep 17 00:00:00 2001 From: Ralf Jung Date: Sat, 18 Jul 2015 20:17:18 +0200 Subject: [PATCH 09/16] tweak parts 11-15 again --- solutions/src/counter.rs | 6 +-- src/main.rs | 2 +- src/part11.rs | 2 +- src/part12.rs | 8 +-- src/part13.rs | 13 ++--- src/part14.rs | 15 +++--- src/part15.rs | 106 ++++++++++++++++----------------------- workspace/src/part11.rs | 2 +- workspace/src/part12.rs | 5 +- workspace/src/part13.rs | 2 +- workspace/src/part14.rs | 13 ++--- workspace/src/part15.rs | 20 ++++---- 12 files changed, 87 insertions(+), 107 deletions(-) diff --git a/solutions/src/counter.rs b/solutions/src/counter.rs index afea9d0..319058e 100644 --- a/solutions/src/counter.rs +++ b/solutions/src/counter.rs @@ -11,19 +11,19 @@ impl ConcurrentCounter { } pub fn increment(&self, by: usize) { - let mut counter = self.0.write().unwrap(); + let mut counter = self.0.write().unwrap_or_else(|e| e.into_inner()); *counter = *counter + by; } pub fn compare_and_inc(&self, test: usize, by: usize) { - let mut counter = self.0.write().unwrap(); + let mut counter = self.0.write().unwrap_or_else(|e| e.into_inner()); if *counter == test { *counter += by; } } pub fn get(&self) -> usize { - let counter = self.0.read().unwrap(); + let counter = self.0.read().unwrap_or_else(|e| e.into_inner()); *counter } } diff --git a/src/main.rs b/src/main.rs index 1111443..1317856 100644 --- a/src/main.rs +++ b/src/main.rs @@ -84,7 +84,7 @@ // * [Part 12: Rc, Interior Mutability, Cell, RefCell](part12.html) // * [Part 13: Concurrency, Arc, Send](part13.html) // * [Part 14: Slices, Arrays, External Dependencies](part14.html) -// * [Part 15: Mutex, Interior Mutability (cont.), Sync](part15.html) +// * [Part 15: Mutex, Interior Mutability (cont.), RwLock, Sync](part15.html) // * (to be continued) // #![allow(dead_code, unused_imports, unused_variables, unused_mut, unreachable_code)] diff --git a/src/part11.rs b/src/part11.rs index 5cc1462..9d093e3 100644 --- a/src/part11.rs +++ b/src/part11.rs @@ -42,7 +42,7 @@ impl Callbacks { // Registration simply stores the callback. pub fn register(&mut self, callback: Box) { - self.callbacks.push(callback); /*@*/ + self.callbacks.push(callback); } // We can also write a generic version of `register`, such that it will be instantiated with some concrete closure type `F` diff --git a/src/part12.rs b/src/part12.rs index c749865..40d9ad5 100644 --- a/src/part12.rs +++ b/src/part12.rs @@ -26,7 +26,7 @@ struct Callbacks { impl Callbacks { pub fn new() -> Self { - Callbacks { callbacks: Vec::new() } /*@*/ + Callbacks { callbacks: Vec::new() } } // Registration works just like last time, except that we are creating an `Rc` now. @@ -37,7 +37,7 @@ impl Callbacks { pub fn call(&self, val: i32) { // We only need a shared iterator here. Since `Rc` is a smart pointer, we can directly call the callback. for callback in self.callbacks.iter() { - callback(val); /*@*/ + callback(val); /*@*/ } } } @@ -108,11 +108,11 @@ struct CallbacksMut { impl CallbacksMut { pub fn new() -> Self { - CallbacksMut { callbacks: Vec::new() } /*@*/ + CallbacksMut { callbacks: Vec::new() } } pub fn register(&mut self, callback: F) { - let cell = Rc::new(RefCell::new(callback)); + let cell = Rc::new(RefCell::new(callback)); /*@*/ self.callbacks.push(cell); /*@*/ } diff --git a/src/part13.rs b/src/part13.rs index 76d7154..9f42b82 100644 --- a/src/part13.rs +++ b/src/part13.rs @@ -7,10 +7,10 @@ use std::sync::mpsc::{sync_channel, SyncSender, Receiver}; use std::sync::Arc; //@ Our next stop are the concurrency features of Rust. We are going to write our own small version of "grep", -//@ called *rgrep*, and it is going to make use of concurrency: One thread reads the input files, one thread does +//@ called *rgrep*, and it is going to perform three jobs concurrently: One thread reads the input files, one thread does //@ the actual matching, and one thread writes the output. I already mentioned in the beginning of the course that //@ Rust's type system (more precisely, the discipline of ownership and borrowing) will help us to avoid a common -//@ pitfall of concurrent programming: data races. +//@ pitfall of concurrent programming: data races. We will see how that works concretely. // Before we come to the actual code, we define a data-structure `Options` to store all the information we need // to complete the job: Which files to work on, which pattern to look for, and how to output.
@@ -145,7 +145,7 @@ pub fn main() { run(options); } -// **Exercise 12.1**: Change rgrep such that it prints not only the matching lines, but also the name of the file +// **Exercise 13.1**: Change rgrep such that it prints not only the matching lines, but also the name of the file // and the number of the line in the file. You will have to change the type of the channels from `String` to something // that records this extra information. @@ -163,7 +163,7 @@ pub fn main() { //@ still be around. After it sent the string to the other side, `read_files` has no pointer into the string content //@ anymore, and hence no way to race on the data with someone else. //@ -//@ There is a little more to this. Remember the `'static` bound we had to add to `register` in the previous part, to make +//@ There is a little more to this. Remember the `'static` bound we had to add to `register` in the previous parts, to make //@ sure that the callbacks do not reference any pointers that might become invalid? This is just as crucial for spawning //@ a thread: In general, that thread could last for much longer than the current stack frame. Thus, it must not use //@ any pointers to data in that stack frame. This is achieved by requiring the `FnOnce` closure passed to `thread::spawn` @@ -179,9 +179,10 @@ pub fn main() { //@ //@ The answer is already hinted at in the error: It will say something about `Send`. You may have noticed that the closure in //@ `thread::spawn` does not just have a `'static` bound, but also has to satisfy `Send`. `Send` is a trait, and just like `Copy`, -//@ it's just a marker - there are no functions provided by `Send`. What the trait says is that types which are `Send`, can be +//@ it's just a marker - there are no functions provided by `Send`. What the trait says is that types which are `Send` can be //@ safely sent to another thread without causing trouble. Of course, all the primitive data-types are `Send`. So is `Arc`, -//@ which is why Rust accepted our code. But `Rc` is not `Send`, and for a good reason! +//@ which is why Rust accepted our code. But `Rc` is not `Send`, and for a good reason! If had two `Rc` to the same data, and +//@ sent one of them to another thread, things could go havoc due to the lack of synchronization. //@ //@ Now, `Send` as a trait is fairly special. It has a so-called *default implementation*. This means that *every type* implements //@ `Send`, unless it opts out. Opting out is viral: If your type contains a type that opted out, then you don't have `Send`, either. diff --git a/src/part14.rs b/src/part14.rs index 6550fe5..eb2011a 100644 --- a/src/part14.rs +++ b/src/part14.rs @@ -27,7 +27,7 @@ pub fn sort(data: &mut [T]) { /* Invariant: pivot is data[0]; everything with index (0,lpos) is <= pivot; [rpos,len) is >= pivot; lpos < rpos */ loop { - // **Exercise 13.1**: Complete this Quicksort loop. You can use `swap` on slices to swap two elements. Write a + // **Exercise 14.1**: Complete this Quicksort loop. You can use `swap` on slices to swap two elements. Write a // test function for `sort`. unimplemented!() } @@ -47,8 +47,8 @@ pub fn sort(data: &mut [T]) { sort(part2); /*@*/ } -// **Exercise 13.2**: Since `String` implements `PartialEq`, you can now change the function `output_lines` in the previous part -// to call the sort function above. If you did exercise 12.1, you will have slightly more work. Make sure you sort by the matched line +// **Exercise 14.2**: Since `String` implements `PartialEq`, you can now change the function `output_lines` in the previous part +// to call the sort function above. If you did exercise 13.1, you will have slightly more work. Make sure you sort by the matched line // only, not by filename or line number! // Now, we can sort, e.g., an vector of numbers. @@ -82,7 +82,7 @@ fn sort_array() { //@ arguments based on the usage string. External dependencies are declared in the `Cargo.toml` file. //@ I already prepared that file, but the declaration of the dependency is still commented out. So please open `Cargo.toml` of your workspace -//@ now, and enabled the two commented-out lines. Then do `cargo build`. Cargo will now download the crate from crates.io, compile it, +//@ now, and enable the two commented-out lines. Then do `cargo build`. Cargo will now download the crate from crates.io, compile it, //@ and link it to your program. In the future, you can do `cargo update` to make it download new versions of crates you depend on. //@ Note that crates.io is only the default location for dependencies, you can also give it the URL of a git repository or some local //@ path. All of this is explained in the [Cargo Guide](http://doc.crates.io/guide.html). @@ -91,7 +91,8 @@ fn sort_array() { // Remove the attribute of the `rgrep` module to enable compilation. #[cfg(feature = "disabled")] pub mod rgrep { - // Now that `docopt` is linked, we can first add it to the namespace and then import shorter names with `use`. We also import some other pieces that we will need. + // Now that `docopt` is linked, we can first add it to the namespace with `extern crate` and then import shorter names with `use`. + // We also import some other pieces that we will need. extern crate docopt; use self::docopt::Docopt; use part12::{run, Options, OutputMode}; @@ -108,7 +109,7 @@ Options: // This function extracts the rgrep options from the command-line arguments. fn get_options() -> Options { - // Parse `argv` and exit the program with an error message if it fails. This is taken from the [`docopt` documentation](http://burntsushi.net/rustdoc/docopt/). + // This parses `argv` and exit the program with an error message if it fails. The code is taken from the [`docopt` documentation](http://burntsushi.net/rustdoc/docopt/).
//@ The function `and_then` takes a closure from `T` to `Result`, and uses it to transform a `Result` to a //@ `Result`. This way, we can chain computations that only happen if the previous one succeeded (and the error //@ type has to stay the same). In case you know about monads, this style of programming will be familiar to you. @@ -153,7 +154,7 @@ Options: } } -// **Exercise 13.3**: Wouldn't it be nice if rgrep supported regular expressions? There's already a crate that does all the parsing and matching on regular +// **Exercise 14.3**: Wouldn't it be nice if rgrep supported regular expressions? There's already a crate that does all the parsing and matching on regular // expression, it's called [regex](https://crates.io/crates/regex). Add this crate to the dependencies of your workspace, add an option ("-r") to switch // the pattern to regular-expression mode, and change `filter_lines` to honor this option. The documentation of regex is available from its crates.io site. // (You won't be able to use the `regex!` macro if you are on the stable or beta channel of Rust. But it wouldn't help for our use-case anyway.) diff --git a/src/part15.rs b/src/part15.rs index a783689..9ae5aaf 100644 --- a/src/part15.rs +++ b/src/part15.rs @@ -1,20 +1,23 @@ -// Rust-101, Part 15: Mutex, Interior Mutability (cont.), Sync -// =========================================================== +// Rust-101, Part 15: Mutex, Interior Mutability (cont.), RwLock, Sync +// =================================================================== use std::sync::{Arc, Mutex}; use std::thread; //@ We already saw that we can use `Arc` to share memory between threads. However, `Arc` can only provide *read-only* -//@ access to memory: Since there is aliasing, Rust cannot, in general, permit mutation. If however, -//@ some care would be taken at run-time, then mutation would still be all right: We have to ensure that whenever -//@ someone changes the data, nobody else is looking at it. In other words, we need a *critical section* or (as it -//@ is called in Rust) a [`Mutex`](http://doc.rust-lang.org/stable/std/sync/struct.Mutex.html). Some other languages also call this a *lock*. +//@ access to memory: Since there is aliasing, Rust cannot, in general, permit mutation. To implement shared-memory +//@ concurrency, we need to have aliasing and permutation - following, of course, some strict rules to make sure +//@ there are no data races. In Rust, shared-memory concurrency is obtained through *interior mutability*, +//@ which we already discussed in a single-threaded context in part 12. //@ -//@ As an example, let us write a concurrent counter. As usual in Rust, we first have to think about our data layout. -//@ In case of the mutex, this means we have to declare the type of the data that we want to be protected. In Rust, -//@ a `Mutex` protects data, not code - and it is impossible to access the data in any other way. This is generally considered -//@ good style, but other languages typically lack the ability to actually enforce this. -//@ Of course, we want multiple threads to have access to this `Mutex`, so we wrap it in an `Arc`. +//@ The most basic type for interior mutability that supports concurrency is [`Mutex`](http://doc.rust-lang.org/stable/std/sync/struct.Mutex.html). +//@ This type implements *critical sections* (or *locks*), but in a data-driven way: One has to specify +//@ the type of the data that's protected by the mutex, and Rust ensures that the data is *only* accessed +//@ through the mutex. In other words, "lock data, not code" is actually enforced by the type system, which +//@ becomes possible because of the discipline of ownership and borrowing. +//@ +//@ As an example, let us write a concurrent counter. As usual in Rust, we first have to think about our data layout: +//@ That will be `Mutex`. Of course, we want multiple threads to have access to this `Mutex`, so we wrap it in an `Arc`. //@ //@ Rather than giving every field a name, a struct can also be defined by just giving a sequence of types (similar //@ to how a variant of an `enum` is defined). This is called a *tuple struct*. It is often used when constructing @@ -32,26 +35,12 @@ impl ConcurrentCounter { ConcurrentCounter(Arc::new(Mutex::new(val))) /*@*/ } - //@ The core operation is, of course, `increment`. The type may be surprising at first: A shared borrow? - //@ How can this be, since `increment` definitely modifies the counter? We already discussed above that `Mutex` is - //@ a way to get around this restriction in Rust. This phenomenon of data that can be mutated through a shared - //@ borrow is called *interior mutability*: We are changing the inner parts of the object, but seen from the outside, - //@ this does not count as "mutation". This stands in contrast to *exterior mutability*, which is the kind of - //@ mutability we saw so far, where one piece of data is replaced by something else of the same type. If you are familiar - //@ with languages like ML, you can compare this to how something of type `ref` permits mutation, even though it is - //@ itself a functional value (more precisely, a location) like all the others. - //@ - //@ Interior mutability breaks the rules of Rust that I outlined earlier: There is aliasing (a shared borrow) and mutation. - //@ The reason that this still works is careful programming of the primitives for interior mutability - in this case, that's - //@ `Mutex`. It has to ensure with dynamic checks, at run-time, that things don't fall apart. In particular, it has to ensure - //@ that the data covered by the mutex can only ever be accessed from inside a critical section. This is where Rust's type - //@ system comes into play: With its discipline of ownership and borrowing, it can enforce such rules. Let's see how this goes. + // The core operation is, of course, `increment`. pub fn increment(&self, by: usize) { - // `lock` on a mutex returns a *guard*, giving access to the data contained in the mutex. - //@ (We will discuss the `unwrap` soon.) `.0` is how we access the first component of a tuple or a struct. + // `lock` on a mutex returns a guard, very much like `RefCell`. The guard gives access to the data contained in the mutex. + //@ (We will discuss the `unwrap` soon.) `.0` is how we access the first component of a tuple or a struct. let mut counter = self.0.lock().unwrap(); - //@ The guard is another example of a smart pointer, and it can be used as if it were a pointer to the data protected - //@ by the lock. + //@ The guard is a smart pointer to the content. *counter = *counter + by; //@ At the end of the function, `counter` is dropped and the mutex is available again. //@ This can only happen when full ownership of the guard is given up. In particular, it is impossible for us @@ -106,16 +95,28 @@ pub fn main() { println!("Final value: {}", counter.get()); } -// **Exercise 14.1**: Besides `Mutex`, there's also [`RwLock`](http://doc.rust-lang.org/stable/std/sync/struct.RwLock.html), which -// provides two ways of locking: One that grants only read-only access, to any number of concurrent readers, and another one -// for exclusive write access. (Notice that this is the same pattern we already saw with shared vs. mutable borrows.) Change -// the code above to use `RwLock`, such that multiple calls to `get` can be executed at the same time. -// -// **Exercise 14.2**: Add an operation `compare_and_inc(&self, test: usize, by: usize)` that increments the counter by +// **Exercise 15.1**: Add an operation `compare_and_inc(&self, test: usize, by: usize)` that increments the counter by // `by` *only if* the current value is `test`. +// +// **Exercise 15.2**: Rather than panicking in case the lock is poisoned, we can use `into_innter` on the error to recover +// the data inside the lock. Change the code above to do that. Try using `unwrap_or_else` for this job. + +//@ ## `RwLock` +//@ Besides `Mutex`, there's also [`RwLock`](http://doc.rust-lang.org/stable/std/sync/struct.RwLock.html), which +//@ provides two ways of locking: One that grants only read-only access, to any number of concurrent readers, and another one +//@ for exclusive write access. Notice that this is the same pattern we already saw with shared vs. mutable borrows. Hence +//@ another way of explaining `RwLock` is to say that it is like `RefCell`, but works even for concurrent access. Rather than +//@ panicking when the data is already borrowed, `RwLock` will of course block the current thread until the lock is available. +//@ In this view, `Mutex` is a stripped-down version of `RwLock` that does not distinguish readers and writers. + +// **Exercise 15.3**: Change the code above to use `RwLock`, such that multiple calls to `get` can be executed at the same time. //@ ## Sync -//@ In part 12, we talked about types that are marked `Send` and thus can be moved to another thread. However, we did *not* +//@ Clearly, if we had used `RefCell` rather than `Mutex`, the code above could not work: `RefCell` is not prepared for +//@ multiple threads trying to access the data at the same time. How does Rust make sure that we don't accidentally use +//@ `RefCell` across multiple threads? +//@ +//@ In part 13, we talked about types that are marked `Send` and thus can be moved to another thread. However, we did *not* //@ talk about the question whether a borrow is `Send`. For `&mut T`, the answer is: It is `Send` whenever `T` is send. //@ `&mut` allows moving values back and forth, it is even possible to [`swap`](http://doc.rust-lang.org/beta/std/mem/fn.swap.html) //@ the contents of two mutably borrowed values. So in terms of concurrency, sending a mutable borrow is very much like @@ -124,43 +125,22 @@ pub fn main() { //@ But what about `&T`, a shared borrow? Without interior mutability, it would always be all-right to send such values. //@ After all, no mutation can be performed, so there can be as many threads accessing the data as we like. In the //@ presence of interior mutability though, the story gets more complicated. Rust introduces another marker trait for -//@ this purpose: `Sync`. A type `T` is `Sync` if `&T` is `Send`. Just like `Send`, `Sync` has a default implementation +//@ this purpose: `Sync`. A type `T` is `Sync` if and only if `&T` is `Send`. Just like `Send`, `Sync` has a default implementation //@ and is thus automatically implemented for a data-structure *if* all its members implement it. //@ +//@ Since `Arc` provides multiple threads with a shared borrow of its content, `Arc` is only `Send` if `T` is `Sync`. +//@ So if we had used `RefCell` above, which is *not* `Sync`, Rust would have caught that mistake. Notice however that +//@ `RefCell` *is* `Send`: If ownership of the entire cell is moved to another thread, it is still not possible for several +//@ threads to try to access the data at the same time. +//@ //@ Almost all the types we saw so far are `Sync`, with the exception of `Rc`. Remember that a shared borrow is good enough //@ for cloning, and we don't want other threads to clone our local `Rc`, so it must not be `Sync`. The rule of `Mutex` //@ is to enforce synchronization, so it should not be entirely surprising that `Mutex` is `Send` *and* `Sync` provided that //@ `T` is `Send`. //@ -//@ In the next part, we will learn about a type called `RefCell` that is `Send`, but not `Sync`. -//@ //@ You may be curious whether there is a type that's `Sync`, but not `Send`. There are indeed rather esoteric examples //@ of such types, but that's not a topic I want to go into. In case you are curious, there's a //@ [Rust RFC](https://github.com/rust-lang/rfcs/blob/master/text/0458-send-improvements.md), which contains a type `RcMut` that would be `Sync` and not `Send`. //@ You may also be interested in [this blog post](https://huonw.github.io/blog/2015/02/some-notes-on-send-and-sync/) on the topic. -// FIXME TODO some old outdated explanation FIXME TODO - -//@ [`RefCell`](http://doc.rust-lang.org/beta/std/cell/struct.RefCell.html) -//@ [`is very much like `RwLock`, but it's not thread-safe: "Locking" is done without atomic operations. -//@ One can also see it as a dynamically checked version of Rust's usual borrowing rules. You have to explicitly say -//@ when you want to borrow the data in there shared, or mutably, and Rust will complain at run-time if you have -//@ a mutable borrow while any other borrow is active. You can then write programs that Rust may otherwise not -//@ accept. Sending a shared borrow to this to another thread is dangerous, as the checks are not performed in -//@ a thread-safe manner. However, sending the *entire* `RefCell` is okay, because there's only ever one owner, and all -//@ we need to ensure is that everybody attempting to borrow is in the same thread as the owner.
-//@ [`Cell`](http://doc.rust-lang.org/beta/std/cell/struct.Cell.html) is like a stripped-down version of `RefCell`: It doesn't allow -//@ you to borrow its content. Instead, it has a methods `get` and `set` to change the value stored in the cell, and to copy it out. -//@ For obvious reasons, this requires `T` to be `Copy`. -//@ -//@ You can also think about all these types coming from the other end: Starting with `Cell`, we have a primitive for -//@ interior mutability that provides `get` and `set`, both just requiring a shared borrow. Think of these functions as -//@ mutating the *content* of the cell, but not the cell itself, the container. (Just like in ML, where assignment to a -//@ `ref` changes the content, not the location.) However, due to the ownership discipline, `Cell` only works for types -//@ that are `Copy`. Hence we also have `RefCell`, which allows working with the data right in the cell, rather than -//@ having to copy it out. `RefCell` uses non-atomic operations for this purpose, so for the multi-threaded setting, there's -//@ the thread-safe `RwLock`. And finally, in case a distinction between readers and writers is not helpful, one can use the -//@ more efficient `Mutex`. - - //@ [index](main.html) | [previous](part14.html) | [next](main.html) diff --git a/workspace/src/part11.rs b/workspace/src/part11.rs index ac19371..746aed9 100644 --- a/workspace/src/part11.rs +++ b/workspace/src/part11.rs @@ -23,7 +23,7 @@ impl Callbacks { // Registration simply stores the callback. pub fn register(&mut self, callback: Box) { - unimplemented!() + self.callbacks.push(callback); } // We can also write a generic version of `register`, such that it will be instantiated with some concrete closure type `F` diff --git a/workspace/src/part12.rs b/workspace/src/part12.rs index a351995..23db4f6 100644 --- a/workspace/src/part12.rs +++ b/workspace/src/part12.rs @@ -13,7 +13,7 @@ struct Callbacks { impl Callbacks { pub fn new() -> Self { - unimplemented!() + Callbacks { callbacks: Vec::new() } } // Registration works just like last time, except that we are creating an `Rc` now. @@ -69,11 +69,10 @@ struct CallbacksMut { impl CallbacksMut { pub fn new() -> Self { - unimplemented!() + CallbacksMut { callbacks: Vec::new() } } pub fn register(&mut self, callback: F) { - let cell = Rc::new(RefCell::new(callback)); unimplemented!() } diff --git a/workspace/src/part13.rs b/workspace/src/part13.rs index 501fb7d..ae12cd1 100644 --- a/workspace/src/part13.rs +++ b/workspace/src/part13.rs @@ -120,7 +120,7 @@ pub fn main() { run(options); } -// **Exercise 12.1**: Change rgrep such that it prints not only the matching lines, but also the name of the file +// **Exercise 13.1**: Change rgrep such that it prints not only the matching lines, but also the name of the file // and the number of the line in the file. You will have to change the type of the channels from `String` to something // that records this extra information. diff --git a/workspace/src/part14.rs b/workspace/src/part14.rs index 6e007aa..fb580f9 100644 --- a/workspace/src/part14.rs +++ b/workspace/src/part14.rs @@ -14,7 +14,7 @@ pub fn sort(data: &mut [T]) { /* Invariant: pivot is data[0]; everything with index (0,lpos) is <= pivot; [rpos,len) is >= pivot; lpos < rpos */ loop { - // **Exercise 13.1**: Complete this Quicksort loop. You can use `swap` on slices to swap two elements. Write a + // **Exercise 14.1**: Complete this Quicksort loop. You can use `swap` on slices to swap two elements. Write a // test function for `sort`. unimplemented!() } @@ -27,8 +27,8 @@ pub fn sort(data: &mut [T]) { unimplemented!() } -// **Exercise 13.2**: Since `String` implements `PartialEq`, you can now change the function `output_lines` in the previous part -// to call the sort function above. If you did exercise 12.1, you will have slightly more work. Make sure you sort by the matched line +// **Exercise 14.2**: Since `String` implements `PartialEq`, you can now change the function `output_lines` in the previous part +// to call the sort function above. If you did exercise 13.1, you will have slightly more work. Make sure you sort by the matched line // only, not by filename or line number! // Now, we can sort, e.g., an vector of numbers. @@ -49,7 +49,8 @@ fn sort_array() { // Remove the attribute of the `rgrep` module to enable compilation. #[cfg(feature = "disabled")] pub mod rgrep { - // Now that `docopt` is linked, we can first add it to the namespace and then import shorter names with `use`. We also import some other pieces that we will need. + // Now that `docopt` is linked, we can first add it to the namespace with `extern crate` and then import shorter names with `use`. + // We also import some other pieces that we will need. extern crate docopt; use self::docopt::Docopt; use part12::{run, Options, OutputMode}; @@ -66,7 +67,7 @@ Options: // This function extracts the rgrep options from the command-line arguments. fn get_options() -> Options { - // Parse `argv` and exit the program with an error message if it fails. This is taken from the [`docopt` documentation](http://burntsushi.net/rustdoc/docopt/). + // This parses `argv` and exit the program with an error message if it fails. The code is taken from the [`docopt` documentation](http://burntsushi.net/rustdoc/docopt/).
let args = Docopt::new(USAGE).and_then(|d| d.parse()).unwrap_or_else(|e| e.exit()); // Now we can get all the values out. let count = args.get_bool("-c"); @@ -100,7 +101,7 @@ Options: } } -// **Exercise 13.3**: Wouldn't it be nice if rgrep supported regular expressions? There's already a crate that does all the parsing and matching on regular +// **Exercise 14.3**: Wouldn't it be nice if rgrep supported regular expressions? There's already a crate that does all the parsing and matching on regular // expression, it's called [regex](https://crates.io/crates/regex). Add this crate to the dependencies of your workspace, add an option ("-r") to switch // the pattern to regular-expression mode, and change `filter_lines` to honor this option. The documentation of regex is available from its crates.io site. // (You won't be able to use the `regex!` macro if you are on the stable or beta channel of Rust. But it wouldn't help for our use-case anyway.) diff --git a/workspace/src/part15.rs b/workspace/src/part15.rs index d006b23..1a6873e 100644 --- a/workspace/src/part15.rs +++ b/workspace/src/part15.rs @@ -1,5 +1,5 @@ -// Rust-101, Part 15: Mutex, Interior Mutability (cont.), Sync -// =========================================================== +// Rust-101, Part 15: Mutex, Interior Mutability (cont.), RwLock, Sync +// =================================================================== use std::sync::{Arc, Mutex}; use std::thread; @@ -15,8 +15,9 @@ impl ConcurrentCounter { unimplemented!() } + // The core operation is, of course, `increment`. pub fn increment(&self, by: usize) { - // `lock` on a mutex returns a *guard*, giving access to the data contained in the mutex. + // `lock` on a mutex returns a guard, very much like `RefCell`. The guard gives access to the data contained in the mutex. let mut counter = self.0.lock().unwrap(); *counter = *counter + by; } @@ -61,16 +62,13 @@ pub fn main() { println!("Final value: {}", counter.get()); } -// **Exercise 14.1**: Besides `Mutex`, there's also [`RwLock`](http://doc.rust-lang.org/stable/std/sync/struct.RwLock.html), which -// provides two ways of locking: One that grants only read-only access, to any number of concurrent readers, and another one -// for exclusive write access. (Notice that this is the same pattern we already saw with shared vs. mutable borrows.) Change -// the code above to use `RwLock`, such that multiple calls to `get` can be executed at the same time. -// -// **Exercise 14.2**: Add an operation `compare_and_inc(&self, test: usize, by: usize)` that increments the counter by +// **Exercise 15.1**: Add an operation `compare_and_inc(&self, test: usize, by: usize)` that increments the counter by // `by` *only if* the current value is `test`. +// +// **Exercise 15.2**: Rather than panicking in case the lock is poisoned, we can use `into_innter` on the error to recover +// the data inside the lock. Change the code above to do that. Try using `unwrap_or_else` for this job. -// FIXME TODO some old outdated explanation FIXME TODO - +// **Exercise 15.3**: Change the code above to use `RwLock`, such that multiple calls to `get` can be executed at the same time. -- 2.30.2 From a2eeb1b93e8f52b2119fb11d56f5ffc764ac747b Mon Sep 17 00:00:00 2001 From: Ralf Jung Date: Sat, 18 Jul 2015 23:26:18 +0200 Subject: [PATCH 10/16] write part 16 --- solutions/src/list.rs | 204 ++++++++++++++++++++++++++++++++++++++++ solutions/src/main.rs | 3 +- src/main.rs | 1 + src/part12.rs | 1 + src/part15.rs | 3 +- src/part16.rs | 196 ++++++++++++++++++++++++++++++++++++++ workspace/src/main.rs | 1 + workspace/src/part16.rs | 113 ++++++++++++++++++++++ 8 files changed, 520 insertions(+), 2 deletions(-) create mode 100644 solutions/src/list.rs create mode 100644 src/part16.rs create mode 100644 workspace/src/part16.rs diff --git a/solutions/src/list.rs b/solutions/src/list.rs new file mode 100644 index 0000000..180627d --- /dev/null +++ b/solutions/src/list.rs @@ -0,0 +1,204 @@ + +use std::ptr; +use std::mem; +use std::marker::PhantomData; + +fn box_into_raw(b: Box) -> *mut T { + unsafe { mem::transmute(b) } +} +unsafe fn raw_into_box(r: *mut T) -> Box { + mem::transmute(r) +} + +struct Node { + data: T, + next: NodePtr, + prev: NodePtr, +} +type NodePtr = *mut Node; + +pub struct LinkedList { + first: NodePtr, + last: NodePtr, + _marker: PhantomData, +} + +impl LinkedList { + pub fn new() -> Self { + LinkedList { first: ptr::null_mut(), last: ptr::null_mut(), _marker: PhantomData } + } + + pub fn push_back(&mut self, t: T) { + // Create the new node. + let new = Box::new( Node { data: t, next: ptr::null_mut(), prev: self.last } ); + let new = box_into_raw(new); + // Update other points to this node. + if self.last.is_null() { + debug_assert!(self.first.is_null()); + self.first = new; + } else { + debug_assert!(!self.first.is_null()); + unsafe { (*self.last).next = new; } + } + // Make this the last node. + self.last = new; + } + + pub fn pop_back(&mut self) -> Option { + if self.last.is_null() { + None + } else { + let last = self.last; + let new_last = unsafe { (*self.last).prev }; + self.last = new_last; + if new_last.is_null() { + // The list is now empty. + self.first = new_last; + } + let last = unsafe { raw_into_box(last) } ; + Some(last.data) + } + } + + pub fn push_front(&mut self, t: T) { + // Create the new node. + let new = Box::new( Node { data: t, next: self.first, prev: ptr::null_mut() } ); + let new = box_into_raw(new); + // Update other points to this node. + if self.first.is_null() { + debug_assert!(self.last.is_null()); + self.last = new; + } + else { + debug_assert!(!self.last.is_null()); + unsafe { (*self.first).prev = new; } + } + // Make this the first node. + self.first = new; + } + + pub fn pop_front(&mut self) -> Option { + if self.first.is_null() { + None + } else { + let first = self.first; + let new_first = unsafe { (*self.first).next }; + self.first = new_first; + if new_first.is_null() { + // The list is now empty. + self.last = new_first; + } + let first = unsafe { raw_into_box(first) } ; + Some(first.data) + } + } + + pub fn for_each(&mut self, mut f: F) { + let mut cur_ptr = self.first; + while !cur_ptr.is_null() { + // Iterate over every node, and call `f`. + f(unsafe{ &mut (*cur_ptr).data }); + cur_ptr = unsafe{ (*cur_ptr).next }; + } + } + + pub fn iter_mut(&self) -> IterMut { + IterMut { next: self.first, _marker: PhantomData } + } +} + +pub struct IterMut<'a, T> where T: 'a { + next: NodePtr, + _marker: PhantomData<&'a T>, +} + +impl<'a, T> Iterator for IterMut<'a, T> { + type Item = &'a mut T; + + fn next(&mut self) -> Option { + if self.next.is_null() { + None + } else { + let ret = unsafe{ &mut (*self.next).data }; + self.next = unsafe { (*self.next).next }; + Some(ret) + } + } +} + +impl Drop for LinkedList { + fn drop(&mut self) { + let mut cur_ptr = self.first; + while !cur_ptr.is_null() { + let cur = unsafe { raw_into_box(cur_ptr) }; + cur_ptr = cur.next; + drop(cur); + } + } +} + +#[cfg(test)] +mod tests { + use std::rc::Rc; + use std::cell::Cell; + use super::LinkedList; + + #[test] + fn test_pop_back() { + let mut l: LinkedList = LinkedList::new(); + for i in 0..3 { + l.push_front(-i); + l.push_back(i); + } + + assert_eq!(l.pop_back(), Some(2)); + assert_eq!(l.pop_back(), Some(1)); + assert_eq!(l.pop_back(), Some(0)); + assert_eq!(l.pop_back(), Some(-0)); + assert_eq!(l.pop_back(), Some(-1)); + assert_eq!(l.pop_back(), Some(-2)); + assert_eq!(l.pop_back(), None); + assert_eq!(l.pop_back(), None); + } + + #[test] + fn test_pop_front() { + let mut l: LinkedList = LinkedList::new(); + for i in 0..3 { + l.push_front(-i); + l.push_back(i); + } + + assert_eq!(l.pop_front(), Some(-2)); + assert_eq!(l.pop_front(), Some(-1)); + assert_eq!(l.pop_front(), Some(-0)); + assert_eq!(l.pop_front(), Some(0)); + assert_eq!(l.pop_front(), Some(1)); + assert_eq!(l.pop_front(), Some(2)); + assert_eq!(l.pop_front(), None); + assert_eq!(l.pop_front(), None); + } + + #[derive(Clone)] + struct DropChecker { + count: Rc>, + } + impl Drop for DropChecker { + fn drop(&mut self) { + self.count.set(self.count.get() + 1); + } + } + + #[test] + fn test_drop() { + let count = DropChecker { count: Rc::new(Cell::new(0)) }; + { + let mut l = LinkedList::new(); + for _ in 0..10 { + l.push_back(count.clone()); + l.push_front(count.clone()); + } + } + assert_eq!(count.count.get(), 20); + } +} diff --git a/solutions/src/main.rs b/solutions/src/main.rs index 0242f49..daafe08 100644 --- a/solutions/src/main.rs +++ b/solutions/src/main.rs @@ -8,8 +8,9 @@ extern crate docopt; pub mod bigint; pub mod vec; pub mod rgrep; -pub mod counter; pub mod callbacks; +pub mod counter; +pub mod list; pub fn main() { rgrep::main(); diff --git a/src/main.rs b/src/main.rs index 1317856..19abe4e 100644 --- a/src/main.rs +++ b/src/main.rs @@ -104,6 +104,7 @@ mod part12; mod part13; mod part14; mod part15; +mod part16; // To actually run the code of some part (after filling in the blanks, if necessary), simply edit the `main` // function. diff --git a/src/part12.rs b/src/part12.rs index 40d9ad5..d5b7ce3 100644 --- a/src/part12.rs +++ b/src/part12.rs @@ -9,6 +9,7 @@ use std::cell::{Cell, RefCell}; //@ (There's not even an automatic derivation happening for the cases where it would be possible.) //@ This restriction propagates up to `Callbacks` itself. What could we do about this? +//@ ## `Rc` //@ The solution is to find some way of cloning `Callbacks` without cloning the environments. This can be achieved with //@ `Rc`, a *reference-counted* pointer. This is is another example of a smart pointer. You can `clone` an `Rc` as often //@ as you want, that doesn't affect the data it contains. It only creates more references to the same data. Once all the diff --git a/src/part15.rs b/src/part15.rs index 9ae5aaf..ef2564a 100644 --- a/src/part15.rs +++ b/src/part15.rs @@ -10,6 +10,7 @@ use std::thread; //@ there are no data races. In Rust, shared-memory concurrency is obtained through *interior mutability*, //@ which we already discussed in a single-threaded context in part 12. //@ +//@ ## `Mutex` //@ The most basic type for interior mutability that supports concurrency is [`Mutex`](http://doc.rust-lang.org/stable/std/sync/struct.Mutex.html). //@ This type implements *critical sections* (or *locks*), but in a data-driven way: One has to specify //@ the type of the data that's protected by the mutex, and Rust ensures that the data is *only* accessed @@ -111,7 +112,7 @@ pub fn main() { // **Exercise 15.3**: Change the code above to use `RwLock`, such that multiple calls to `get` can be executed at the same time. -//@ ## Sync +//@ ## `Sync` //@ Clearly, if we had used `RefCell` rather than `Mutex`, the code above could not work: `RefCell` is not prepared for //@ multiple threads trying to access the data at the same time. How does Rust make sure that we don't accidentally use //@ `RefCell` across multiple threads? diff --git a/src/part16.rs b/src/part16.rs new file mode 100644 index 0000000..a613430 --- /dev/null +++ b/src/part16.rs @@ -0,0 +1,196 @@ +// Rust-101, Part 16: Unsafe, Drop (WIP) +// =============================== + +use std::ptr; +use std::mem; +use std::marker::PhantomData; + +//@ As we saw, the rules Rust imposes can get us pretty far: A surprising amount of programming patterns +//@ can be written within safe Rust, and, more importantly, library code like iterators or threads can make +//@ use of the type system to ensure some level of correctness beyond basic memory safety. +//@ +//@ However, there will still be programs that one cannot write in accordance with the borrow checker. And there +//@ will be cases where it may be possible to satisfy the compiler, but only at the cost of some run-time overhead, +//@ as we saw with `RefCell` - overhead which may not be acceptable. In such a situation, it is possible to +//@ use *unsafe* Rust: That's a part of the language that is *known* to open the gate to invalid pointer access +//@ and all other sorts of memory safety. It is typically disabled, guarded by the keyword `unsafe`. Of course, +//@ `unsafe` also means "Here Be Dragons": You are on your own now. Types like `Rc` and `Vec` are implemented +//@ `using unsafe Rust. +//@ +//@ ## Unsafe Code +//@ As an example, let us write a doubly-linked list. Clearly, such a data-structure involves aliasing and mutation: +//@ Every node in the list is pointed to by its left and right neighbor, but still we will want to modify the nodes +//@ (either to change the value at that place, or to insert new nodes). We could now try some clever combination of +//@ `Rc` and `RefCell`, but this would end up being quite annoying - and it would incur some over-head. For a low-level +//@ data-structure like a doubly-linked list, it makes sense to implement an efficient version *once*, that is unsafe +//@ internally, but taht can be used without any risk by safe client code. + +//@ As usually, we start by defining the types. Everything is parameterized by the type `T` of the data stored in the list. +// A node of the list consists of the data, and two node pointers for the predecessor and successor. +struct Node { + next: NodePtr, + prev: NodePtr, + data: T, +} +// A node pointer is a *mutable raw point* to a node. +//@ Raw pointers (`*mut T` and `*const T`) are the Rust equivalent of pointers in C. Unlike borrows, they do not come with +//@ any guarantees: Raw pointers can be null, or they can point to garbage. They don't have a lifetime. +type NodePtr = *mut Node; + +// The linked list itself stores pointers to the first and the last node. In addition, we tell Rust that this type +// will own data of type `T`. +//@ The type `PhantomData` does not actually store anything in memory - it has size zero. However, logically, +//@ Rust will consider a `T` to be present. In this case, Rust knows that data of type `T` may be dropped +//@ whenever a `LinkedList` is dropped. The checks involving destructors are pretty subtle, so it's always +//@ a good idea to provide such extra information. In safe Rust, this can all be done automatically, but here, +//@ we just have a `*mut Node`, which Rust does not consider as actually owning the data it points to. +pub struct LinkedList { + first: NodePtr, + last: NodePtr, + _marker: PhantomData, +} + +//@ Before we get to the actual linked-list methods, we write two short helper functions converting between +//@ mutable raw pointers, and owned pointers (aka `Box`). Both employ `mem::transmute`, which is Rust's +//@ `reinterpret_cast`: It can convert anything to anything, by just re-interpreting the bytes. Clearly, +//@ that's an unsafe operation. + +//@ We declare `raw_into_box` to be an `unsafe` function, telling Rust that calling this function is not generally safe. +//@ The caller will have to ensure that `r` is a valid pointer, and that nobody else has a pointer to this data. +unsafe fn raw_into_box(r: *mut T) -> Box { + mem::transmute(r) +} +//@ The case is slightly different for `box_into_raw`: Converting a `Box` to a raw pointer is always safe. I just drops some +//@ information. Hence we keep the function itself safe, and use an *unsafe block* within the function. This is an (unchecked) +//@ promise to the Rust compiler, saying that even though the code inside that block *could* go wrong, we actually know that +//@ it will not. +fn box_into_raw(b: Box) -> *mut T { + unsafe { mem::transmute(b) } +} + +impl LinkedList { + // A new linked list just contains null pointers. `PhantomData` is how we construct any `PhantomData`. + pub fn new() -> Self { + LinkedList { first: ptr::null_mut(), last: ptr::null_mut(), _marker: PhantomData } + } + + // Add a new node to the end of the list. + pub fn push_back(&mut self, t: T) { + // Create the new node, and make it a raw pointer. + //@ Calling `box_into_raw` gives up ownership of the box, which is crucial: We don't want the + //@ memory that it points to to be deallocated! + let new = Box::new( Node { data: t, next: ptr::null_mut(), prev: self.last } ); + let new = box_into_raw(new); + // Update other points to this node. + if self.last.is_null() { + debug_assert!(self.first.is_null()); + // The list is currently empty, so we have to update the head pointer. + self.first = new; /*@*/ + } else { + debug_assert!(!self.first.is_null()); + // We have to update the `next` pointer of the tail node. + //@ Since Rust does not know that a raw pointer actually to anything, dereferencing such a pointer is + //@ an unsafe operation. So this unsafe block promises that the pointer will actually be valid. + unsafe { (*self.last).next = new; } /*@*/ + } + // Make this the last node. + self.last = new; + } + + // **Exercise 16.1**: Add some more operations to `LinkedList`: `pop_back`, `push_front` and `pop_front`. + // Add testcases for `push_back` and all of your functions. The `pop` functions should take `&mut self` + // and return `Option`. + + // Of course, we will also want to provide an iterator. + //@ This function just creates an instance of `IterMut`, the iterator type which does the actual work. + pub fn iter_mut(&self) -> IterMut { + IterMut { next: self.first, _marker: PhantomData } + } +} + +//@ What does the iterator need to store? Strictly speaking, all it needs is the pointer to the next node +//@ that it is going to visit. However, how do we make sure that this pointer remains valid? We have to +//@ get this right ourselves, as we left the safe realms of borrowing and ownership. Remember that the +//@ key ingredient for iterator safety was to tie the lifetime of the iterator to the lifetime of the +//@ borrow used for `iter_mut`. We will thus give `IterMut` two parameters: A type parameter specifying +//@ the type of the data, and a lifetime parameter specifying for how long the list was borrowed to the +//@ iterator. +//@ +//@ For Rust to accept the type, we have to add two more annotations. First of all, we have to ensure that +//@ the data in the list lives at least as long as the iterator: If you drop the `T: 'a`, Rust will tell +//@ you to add it back. And secondly, Rust will complain if `'a` is not actually used in the struct. +//@ It doesn't know what it is supposed to do with that lifetime. So we use `PhantomData` again to tell +//@ it that in terms of ownership, this type actually (mutably) borrows a linked list. This has no +//@ operational effect, but it means that Rust can deduce the intent we had when adding that +//@ seemingly useless lifetime parameter. +pub struct IterMut<'a, T> where T: 'a { + next: NodePtr, + _marker: PhantomData<&'a mut LinkedList>, +} + +//@ When implementing `Iterator` for `IterMut`, the fact that we have the lifetime `'a` around immediately +//@ pays of: We would not even be able to write down the type `Item` without that lifetime. +impl<'a, T> Iterator for IterMut<'a, T> { + type Item = &'a mut T; + + fn next(&mut self) -> Option { + // The actual iteration is straight-forward: Once we reached a null pointer, we are done. + if self.next.is_null() { + None + } else { + // Otherwise, we can convert the next pointer to a borrow, get a borrow to the data + // and update the iterator. + let next = unsafe { &mut *self.next }; + let ret = &mut next.data; + self.next = next.next; + Some(ret) + } + } +} + +//@ In `next` above, we made crucial use of the assumption that `self.next` is either null or a valid pointer. +//@ This only works because if someone tries to delete elements from a list during iteration, we know that the borrow checker +//@ will catch them: If they call `next`, the lifetime `'a` we artificially added to the iterator has to still be +//@ active, which means the mutable borrow passed to `iter_mut` is still active, which means nobody can delete +//@ anything from the list. In other words, we make use of the expressive type system of Rust, decorating +//@ our own unsafe implementation with just enough information so that Rust can check *uses* of the linked-list. +//@ If the type system were weaker, we could not write a linked-list like the above with a safe interface! + +// **Exercise 16.2**: Add a method `iter` and a type `Iter` providing iteration for shared borrows. +// Add testcases for both kinds of iterators. + +// ## `Drop` +//@ The linked list we wrote is already working quite nicely, but there is one problem: When the list is removed, +//@ nobody bothers to deallocate the remaining nodes. Even worse, if `T` itself has a destructor that needs to +//@ clean up, it is not called for the element remaining in the list. We need to take care of that ourselves. +//@ +//@ In Rust, adding a destructor for a type is done by implementing the `Drop` trait. This is a very special trait. +//@ It can only be implemented for *nominal types*, i.e., you cannot implement `Drop` for `&mut T`. You also cannot +//@ restrict the type and lifetime parameters further - the `Drop` implementation has to apply to *all* instances +//@ of `LinkedList`. +impl Drop for LinkedList { + // The destructor itself is a method which takes `self` in mutably borrowed form. It cannot own `self`, because then + // the destructor of `self` would be called at the end pf the function, resulting in endless recursion... + fn drop(&mut self) { + let mut cur_ptr = self.first; + while !cur_ptr.is_null() { + // In the destructor, we just iterate over the entire list, successively obtaining ownership + // (`Box`) of every node. When the box is dropped, it will call the destructor on `data` if + // necessary, and subsequently free the node on the heap. + //@ We call `drop` explicitly here just for documentation purposes. + let cur = unsafe { raw_into_box(cur_ptr) }; + cur_ptr = cur.next; + drop(cur); + } + } +} + +//@ ## The End +//@ Congratulations! You complete Rust-101. This was the last example of the last part. I hope you enjoyed it. +//@ If you have feedback, please head to the [Rust-101](https://www.ralfj.de/projects/rust-101/) website +//@ and let me know how you liked it. The entire course is open-source (under CC-BY-SA 4.0), and contributions are welcome! +//@ +//@ The [index](main.html) contains some more links to additional resources you may find useful. With that, there's +//@ only one thing left to say: Happy Rust Hacking! + +//@ [index](main.html) | [previous](part15.html) diff --git a/workspace/src/main.rs b/workspace/src/main.rs index a86a65a..77632d9 100644 --- a/workspace/src/main.rs +++ b/workspace/src/main.rs @@ -17,6 +17,7 @@ mod part12; mod part13; mod part14; mod part15; +mod part16; // This decides which part is actually run. fn main() { diff --git a/workspace/src/part16.rs b/workspace/src/part16.rs new file mode 100644 index 0000000..a804f8e --- /dev/null +++ b/workspace/src/part16.rs @@ -0,0 +1,113 @@ +// Rust-101, Part 16: Unsafe, Drop (WIP) +// =============================== + +use std::ptr; +use std::mem; +use std::marker::PhantomData; + + +// A node of the list consists of the data, and two node pointers for the predecessor and successor. +struct Node { + next: NodePtr, + prev: NodePtr, + data: T, +} +// A node pointer is a *mutable raw point* to a node. +type NodePtr = *mut Node; + +// The linked list itself stores pointers to the first and the last node. In addition, we tell Rust that this type +// will own data of type `T`. +pub struct LinkedList { + first: NodePtr, + last: NodePtr, + _marker: PhantomData, +} + + +unsafe fn raw_into_box(r: *mut T) -> Box { + mem::transmute(r) +} +fn box_into_raw(b: Box) -> *mut T { + unsafe { mem::transmute(b) } +} + +impl LinkedList { + // A new linked list just contains null pointers. `PhantomData` is how we construct any `PhantomData`. + pub fn new() -> Self { + LinkedList { first: ptr::null_mut(), last: ptr::null_mut(), _marker: PhantomData } + } + + // Add a new node to the end of the list. + pub fn push_back(&mut self, t: T) { + // Create the new node, and make it a raw pointer. + let new = Box::new( Node { data: t, next: ptr::null_mut(), prev: self.last } ); + let new = box_into_raw(new); + // Update other points to this node. + if self.last.is_null() { + debug_assert!(self.first.is_null()); + // The list is currently empty, so we have to update the head pointer. + unimplemented!() + } else { + debug_assert!(!self.first.is_null()); + // We have to update the `next` pointer of the tail node. + unimplemented!() + } + // Make this the last node. + self.last = new; + } + + // **Exercise 16.1**: Add some more operations to `LinkedList`: `pop_back`, `push_front` and `pop_front`. + // Add testcases for `push_back` and all of your functions. The `pop` functions should take `&mut self` + // and return `Option`. + + // Of course, we will also want to provide an iterator. + pub fn iter_mut(&self) -> IterMut { + IterMut { next: self.first, _marker: PhantomData } + } +} + +pub struct IterMut<'a, T> where T: 'a { + next: NodePtr, + _marker: PhantomData<&'a mut LinkedList>, +} + +impl<'a, T> Iterator for IterMut<'a, T> { + type Item = &'a mut T; + + fn next(&mut self) -> Option { + // The actual iteration is straight-forward: Once we reached a null pointer, we are done. + if self.next.is_null() { + None + } else { + // Otherwise, we can convert the next pointer to a borrow, get a borrow to the data + // and update the iterator. + let next = unsafe { &mut *self.next }; + let ret = &mut next.data; + self.next = next.next; + Some(ret) + } + } +} + + +// **Exercise 16.2**: Add a method `iter` and a type `Iter` providing iteration for shared borrows. +// Add testcases for both kinds of iterators. + +// ## `Drop` +impl Drop for LinkedList { + // The destructor itself is a method which takes `self` in mutably borrowed form. It cannot own `self`, because then + // the destructor of `self` would be called at the end pf the function, resulting in endless recursion... + fn drop(&mut self) { + let mut cur_ptr = self.first; + while !cur_ptr.is_null() { + // In the destructor, we just iterate over the entire list, successively obtaining ownership + // (`Box`) of every node. When the box is dropped, it will call the destructor on `data` if + // necessary, and subsequently free the node on the heap. + let cur = unsafe { raw_into_box(cur_ptr) }; + cur_ptr = cur.next; + drop(cur); + } + } +} + + -- 2.30.2 From 1a691352b57b7338388ff568403495ecb44272eb Mon Sep 17 00:00:00 2001 From: Ralf Jung Date: Sun, 19 Jul 2015 12:54:44 +0200 Subject: [PATCH 11/16] finish part 16 --- README.rst | 7 +++++ src/main.rs | 2 +- src/part15.rs | 4 +-- src/part16.rs | 60 ++++++++++++++++++++++------------------- workspace/src/part16.rs | 17 ++++++------ 5 files changed, 51 insertions(+), 39 deletions(-) diff --git a/README.rst b/README.rst index 3d4188f..db3e742 100644 --- a/README.rst +++ b/README.rst @@ -28,3 +28,10 @@ details. .. _git repository: http://www.ralfj.de/git/rust-101.git .. _on GitHub: https://github.com/RalfJung/rust-101 .. _CC-BY-SA 4.0: https://creativecommons.org/licenses/by-sa/4.0/ + +Contact +------- + +If you found a bug, or want to leave a comment, please +`send me a mail `_. I'm also happy about pull +requests :) diff --git a/src/main.rs b/src/main.rs index 19abe4e..d0069e6 100644 --- a/src/main.rs +++ b/src/main.rs @@ -85,7 +85,7 @@ // * [Part 13: Concurrency, Arc, Send](part13.html) // * [Part 14: Slices, Arrays, External Dependencies](part14.html) // * [Part 15: Mutex, Interior Mutability (cont.), RwLock, Sync](part15.html) -// * (to be continued) +// * [Part 16: Unsafe Rust, Drop](part16.html) // #![allow(dead_code, unused_imports, unused_variables, unused_mut, unreachable_code)] mod part00; diff --git a/src/part15.rs b/src/part15.rs index ef2564a..99eb3be 100644 --- a/src/part15.rs +++ b/src/part15.rs @@ -119,7 +119,7 @@ pub fn main() { //@ //@ In part 13, we talked about types that are marked `Send` and thus can be moved to another thread. However, we did *not* //@ talk about the question whether a borrow is `Send`. For `&mut T`, the answer is: It is `Send` whenever `T` is send. -//@ `&mut` allows moving values back and forth, it is even possible to [`swap`](http://doc.rust-lang.org/beta/std/mem/fn.swap.html) +//@ `&mut` allows moving values back and forth, it is even possible to [`swap`](http://doc.rust-lang.org/stable/std/mem/fn.swap.html) //@ the contents of two mutably borrowed values. So in terms of concurrency, sending a mutable borrow is very much like //@ sending full ownership, in the sense that it can be used to move the object to another thread. //@ @@ -144,4 +144,4 @@ pub fn main() { //@ [Rust RFC](https://github.com/rust-lang/rfcs/blob/master/text/0458-send-improvements.md), which contains a type `RcMut` that would be `Sync` and not `Send`. //@ You may also be interested in [this blog post](https://huonw.github.io/blog/2015/02/some-notes-on-send-and-sync/) on the topic. -//@ [index](main.html) | [previous](part14.html) | [next](main.html) +//@ [index](main.html) | [previous](part14.html) | [next](part16.html) diff --git a/src/part16.rs b/src/part16.rs index a613430..fefccdf 100644 --- a/src/part16.rs +++ b/src/part16.rs @@ -1,11 +1,11 @@ -// Rust-101, Part 16: Unsafe, Drop (WIP) -// =============================== +// Rust-101, Part 16: Unsafe Rust, Drop +// ==================================== use std::ptr; use std::mem; use std::marker::PhantomData; -//@ As we saw, the rules Rust imposes can get us pretty far: A surprising amount of programming patterns +//@ As we saw, the rules Rust imposes to ensure memory safety can get us pretty far. A surprising amount of programming patterns //@ can be written within safe Rust, and, more importantly, library code like iterators or threads can make //@ use of the type system to ensure some level of correctness beyond basic memory safety. //@ @@ -14,16 +14,18 @@ use std::marker::PhantomData; //@ as we saw with `RefCell` - overhead which may not be acceptable. In such a situation, it is possible to //@ use *unsafe* Rust: That's a part of the language that is *known* to open the gate to invalid pointer access //@ and all other sorts of memory safety. It is typically disabled, guarded by the keyword `unsafe`. Of course, -//@ `unsafe` also means "Here Be Dragons": You are on your own now. Types like `Rc` and `Vec` are implemented -//@ `using unsafe Rust. +//@ `unsafe` also means "Here Be Dragons": You are on your own now. +//@ +//@ The goal in these cases is to confine unsafety to the local module. Types like `Rc` and `Vec` are implemented +//@ using unsafe Rust, but *using* them as we did is (believed to be) perfectly safe. //@ //@ ## Unsafe Code //@ As an example, let us write a doubly-linked list. Clearly, such a data-structure involves aliasing and mutation: //@ Every node in the list is pointed to by its left and right neighbor, but still we will want to modify the nodes -//@ (either to change the value at that place, or to insert new nodes). We could now try some clever combination of +//@ (either to change the value at that place, or to insert/delete nodes). We could now try some clever combination of //@ `Rc` and `RefCell`, but this would end up being quite annoying - and it would incur some over-head. For a low-level //@ data-structure like a doubly-linked list, it makes sense to implement an efficient version *once*, that is unsafe -//@ internally, but taht can be used without any risk by safe client code. +//@ internally, but that can be used without any risk by safe client code. //@ As usually, we start by defining the types. Everything is parameterized by the type `T` of the data stored in the list. // A node of the list consists of the data, and two node pointers for the predecessor and successor. @@ -32,9 +34,9 @@ struct Node { prev: NodePtr, data: T, } -// A node pointer is a *mutable raw point* to a node. +// A node pointer is a *mutable raw pointer* to a node. //@ Raw pointers (`*mut T` and `*const T`) are the Rust equivalent of pointers in C. Unlike borrows, they do not come with -//@ any guarantees: Raw pointers can be null, or they can point to garbage. They don't have a lifetime. +//@ any guarantees: Raw pointers can be null, or they can point to garbage. They don't have a lifetime, either. type NodePtr = *mut Node; // The linked list itself stores pointers to the first and the last node. In addition, we tell Rust that this type @@ -53,7 +55,9 @@ pub struct LinkedList { //@ Before we get to the actual linked-list methods, we write two short helper functions converting between //@ mutable raw pointers, and owned pointers (aka `Box`). Both employ `mem::transmute`, which is Rust's //@ `reinterpret_cast`: It can convert anything to anything, by just re-interpreting the bytes. Clearly, -//@ that's an unsafe operation. +//@ that's an unsafe operation and must only be used with great care. If at all possible, its use should be avoided.
+//@ We are making the assumption here that a `Box` and a raw pointer have the same representation in memory. In the future, +//@ Rust will [provide](http://doc.rust-lang.org/beta/alloc/boxed/struct.Box.html#method.from_raw) such [operations](http://doc.rust-lang.org/beta/alloc/boxed/struct.Box.html#method.into_raw) in the standard library, but the exact API is still being fleshed out. //@ We declare `raw_into_box` to be an `unsafe` function, telling Rust that calling this function is not generally safe. //@ The caller will have to ensure that `r` is a valid pointer, and that nobody else has a pointer to this data. @@ -62,8 +66,7 @@ unsafe fn raw_into_box(r: *mut T) -> Box { } //@ The case is slightly different for `box_into_raw`: Converting a `Box` to a raw pointer is always safe. I just drops some //@ information. Hence we keep the function itself safe, and use an *unsafe block* within the function. This is an (unchecked) -//@ promise to the Rust compiler, saying that even though the code inside that block *could* go wrong, we actually know that -//@ it will not. +//@ promise to the Rust compiler, saying that a safe invocation of `box_into_raw` cannot go wrong. fn box_into_raw(b: Box) -> *mut T { unsafe { mem::transmute(b) } } @@ -74,7 +77,7 @@ impl LinkedList { LinkedList { first: ptr::null_mut(), last: ptr::null_mut(), _marker: PhantomData } } - // Add a new node to the end of the list. + // This function adds a new node to the end of the list. pub fn push_back(&mut self, t: T) { // Create the new node, and make it a raw pointer. //@ Calling `box_into_raw` gives up ownership of the box, which is crucial: We don't want the @@ -89,7 +92,7 @@ impl LinkedList { } else { debug_assert!(!self.first.is_null()); // We have to update the `next` pointer of the tail node. - //@ Since Rust does not know that a raw pointer actually to anything, dereferencing such a pointer is + //@ Since Rust does not know that a raw pointer actually points to anything, dereferencing such a pointer is //@ an unsafe operation. So this unsafe block promises that the pointer will actually be valid. unsafe { (*self.last).next = new; } /*@*/ } @@ -101,7 +104,7 @@ impl LinkedList { // Add testcases for `push_back` and all of your functions. The `pop` functions should take `&mut self` // and return `Option`. - // Of course, we will also want to provide an iterator. + // Next, we are going to provide an iterator. //@ This function just creates an instance of `IterMut`, the iterator type which does the actual work. pub fn iter_mut(&self) -> IterMut { IterMut { next: self.first, _marker: PhantomData } @@ -115,7 +118,7 @@ impl LinkedList { //@ borrow used for `iter_mut`. We will thus give `IterMut` two parameters: A type parameter specifying //@ the type of the data, and a lifetime parameter specifying for how long the list was borrowed to the //@ iterator. -//@ + //@ For Rust to accept the type, we have to add two more annotations. First of all, we have to ensure that //@ the data in the list lives at least as long as the iterator: If you drop the `T: 'a`, Rust will tell //@ you to add it back. And secondly, Rust will complain if `'a` is not actually used in the struct. @@ -136,14 +139,14 @@ impl<'a, T> Iterator for IterMut<'a, T> { fn next(&mut self) -> Option { // The actual iteration is straight-forward: Once we reached a null pointer, we are done. if self.next.is_null() { - None + None } else { // Otherwise, we can convert the next pointer to a borrow, get a borrow to the data // and update the iterator. let next = unsafe { &mut *self.next }; let ret = &mut next.data; - self.next = next.next; - Some(ret) + self.next = next.next; /*@*/ + Some(ret) /*@*/ } } } @@ -160,13 +163,13 @@ impl<'a, T> Iterator for IterMut<'a, T> { // Add testcases for both kinds of iterators. // ## `Drop` -//@ The linked list we wrote is already working quite nicely, but there is one problem: When the list is removed, +//@ The linked list we wrote is already working quite nicely, but there is one problem: When the list is dropped, //@ nobody bothers to deallocate the remaining nodes. Even worse, if `T` itself has a destructor that needs to //@ clean up, it is not called for the element remaining in the list. We need to take care of that ourselves. //@ //@ In Rust, adding a destructor for a type is done by implementing the `Drop` trait. This is a very special trait. //@ It can only be implemented for *nominal types*, i.e., you cannot implement `Drop` for `&mut T`. You also cannot -//@ restrict the type and lifetime parameters further - the `Drop` implementation has to apply to *all* instances +//@ restrict the type and lifetime parameters further than the type does - the `Drop` implementation has to apply to *all* instances //@ of `LinkedList`. impl Drop for LinkedList { // The destructor itself is a method which takes `self` in mutably borrowed form. It cannot own `self`, because then @@ -185,12 +188,13 @@ impl Drop for LinkedList { } } -//@ ## The End -//@ Congratulations! You complete Rust-101. This was the last example of the last part. I hope you enjoyed it. -//@ If you have feedback, please head to the [Rust-101](https://www.ralfj.de/projects/rust-101/) website -//@ and let me know how you liked it. The entire course is open-source (under CC-BY-SA 4.0), and contributions are welcome! +// ## The End +//@ Congratulations! You completed Rust-101. This was the last part of the course. I hope you enjoyed it. +//@ If you have feedback or want to contribute yourself, please head to the [Rust-101](https://www.ralfj.de/projects/rust-101/) website +//@ fur further information. The entire course is open-source (under [CC-BY-SA 4.0](https://creativecommons.org/licenses/by-sa/4.0/)). //@ -//@ The [index](main.html) contains some more links to additional resources you may find useful. With that, there's -//@ only one thing left to say: Happy Rust Hacking! +//@ If you want to do more, the examples you saw in this course provide lots of playground for coming up with your own little +//@ extensions here and there. The [index](main.html) contains some more links to additional resources you may find useful. +//@ With that, there's only one thing left to say: Happy Rust Hacking! -//@ [index](main.html) | [previous](part15.html) +//@ [index](main.html) | [previous](part15.html) | next diff --git a/workspace/src/part16.rs b/workspace/src/part16.rs index a804f8e..d0b173a 100644 --- a/workspace/src/part16.rs +++ b/workspace/src/part16.rs @@ -1,5 +1,5 @@ -// Rust-101, Part 16: Unsafe, Drop (WIP) -// =============================== +// Rust-101, Part 16: Unsafe Rust, Drop +// ==================================== use std::ptr; use std::mem; @@ -12,7 +12,7 @@ struct Node { prev: NodePtr, data: T, } -// A node pointer is a *mutable raw point* to a node. +// A node pointer is a *mutable raw pointer* to a node. type NodePtr = *mut Node; // The linked list itself stores pointers to the first and the last node. In addition, we tell Rust that this type @@ -37,7 +37,7 @@ impl LinkedList { LinkedList { first: ptr::null_mut(), last: ptr::null_mut(), _marker: PhantomData } } - // Add a new node to the end of the list. + // This function adds a new node to the end of the list. pub fn push_back(&mut self, t: T) { // Create the new node, and make it a raw pointer. let new = Box::new( Node { data: t, next: ptr::null_mut(), prev: self.last } ); @@ -60,12 +60,13 @@ impl LinkedList { // Add testcases for `push_back` and all of your functions. The `pop` functions should take `&mut self` // and return `Option`. - // Of course, we will also want to provide an iterator. + // Next, we are going to provide an iterator. pub fn iter_mut(&self) -> IterMut { IterMut { next: self.first, _marker: PhantomData } } } + pub struct IterMut<'a, T> where T: 'a { next: NodePtr, _marker: PhantomData<&'a mut LinkedList>, @@ -77,14 +78,13 @@ impl<'a, T> Iterator for IterMut<'a, T> { fn next(&mut self) -> Option { // The actual iteration is straight-forward: Once we reached a null pointer, we are done. if self.next.is_null() { - None + None } else { // Otherwise, we can convert the next pointer to a borrow, get a borrow to the data // and update the iterator. let next = unsafe { &mut *self.next }; let ret = &mut next.data; - self.next = next.next; - Some(ret) + unimplemented!() } } } @@ -110,4 +110,5 @@ impl Drop for LinkedList { } } +// ## The End -- 2.30.2 From 7fdd4203f19f2fa9096d0a662acf22d447e57af1 Mon Sep 17 00:00:00 2001 From: Ralf Jung Date: Sun, 19 Jul 2015 13:00:40 +0200 Subject: [PATCH 12/16] This is no longer work-in-progress --- src/main.rs | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/src/main.rs b/src/main.rs index d0069e6..08a5d25 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,11 +1,10 @@ // Welcome to Rust-101 // =================== // -// This is [Rust-101](https://www.ralfj.de/projects/rust-101/), a small *work-in-progress* -// tutorial for the [Rust language](http://www.rust-lang.org/). -// It is intended to be an interactive, hands-on course: I believe the only way to -// *really* learn a language is to write code in it, so you should be coding during -// the course. +// This is [Rust-101](https://www.ralfj.de/projects/rust-101/), a small tutorial for +// the [Rust language](http://www.rust-lang.org/). It is intended to be an interactive, +// hands-on course: I believe the only way to *really* learn a language is to write code +// in it, so you should be coding during the course. // // If you have any questions that are not answered here, check out the "Additional Resources" // below. In particular, the IRC channel is filled with awesome people willing to help you! I spent @@ -18,7 +17,7 @@ // --------- // // When you got here, I am kind of assuming that you already decided to give Rust at -// least a look, so that I don't have to do much convincing here ;-) . But just in +// least a look, so that I don't have to do much convincing here. But just in // case, here's why I think Rust is worth learning:
// At this time, Rust is a language with a pretty unique set of goals. Rust aims to // achieve C++-style control over memory and execution behavior (like, static vs. dynamic @@ -59,7 +58,7 @@ // -------------- // // Open `your-workspace/src/part00.rs` in your favorite editor, and follow the link below for -// the explanations and exercises. Have fun! +// the explanations and exercises. You are ready to start. Have fun! // // ### Introduction // -- 2.30.2 From 174314786770835cff60100f9ea66aeec59745a4 Mon Sep 17 00:00:00 2001 From: Ralf Jung Date: Sun, 19 Jul 2015 13:02:09 +0200 Subject: [PATCH 13/16] Tune part 16 --- src/part16.rs | 2 +- workspace/src/part16.rs | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/src/part16.rs b/src/part16.rs index fefccdf..5ae6a48 100644 --- a/src/part16.rs +++ b/src/part16.rs @@ -166,7 +166,7 @@ impl<'a, T> Iterator for IterMut<'a, T> { //@ The linked list we wrote is already working quite nicely, but there is one problem: When the list is dropped, //@ nobody bothers to deallocate the remaining nodes. Even worse, if `T` itself has a destructor that needs to //@ clean up, it is not called for the element remaining in the list. We need to take care of that ourselves. -//@ + //@ In Rust, adding a destructor for a type is done by implementing the `Drop` trait. This is a very special trait. //@ It can only be implemented for *nominal types*, i.e., you cannot implement `Drop` for `&mut T`. You also cannot //@ restrict the type and lifetime parameters further than the type does - the `Drop` implementation has to apply to *all* instances diff --git a/workspace/src/part16.rs b/workspace/src/part16.rs index d0b173a..835b8b8 100644 --- a/workspace/src/part16.rs +++ b/workspace/src/part16.rs @@ -94,6 +94,7 @@ impl<'a, T> Iterator for IterMut<'a, T> { // Add testcases for both kinds of iterators. // ## `Drop` + impl Drop for LinkedList { // The destructor itself is a method which takes `self` in mutably borrowed form. It cannot own `self`, because then // the destructor of `self` would be called at the end pf the function, resulting in endless recursion... -- 2.30.2 From f9c259224010e06892dd3cc91b654965da04e0d2 Mon Sep 17 00:00:00 2001 From: Ralf Jung Date: Mon, 20 Jul 2015 14:01:54 +0200 Subject: [PATCH 14/16] newline fix --- solutions/src/list.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/solutions/src/list.rs b/solutions/src/list.rs index 180627d..5137555 100644 --- a/solutions/src/list.rs +++ b/solutions/src/list.rs @@ -1,4 +1,3 @@ - use std::ptr; use std::mem; use std::marker::PhantomData; -- 2.30.2 From 126f7bfe68883e6465d42c529b00d797b2f8478a Mon Sep 17 00:00:00 2001 From: Ralf Jung Date: Tue, 21 Jul 2015 11:14:55 +0200 Subject: [PATCH 15/16] add link to the advanced programming guide --- src/main.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/main.rs b/src/main.rs index 08a5d25..02766c6 100644 --- a/src/main.rs +++ b/src/main.rs @@ -120,5 +120,6 @@ fn main() { // // * [The Rust Book](https://doc.rust-lang.org/stable/book/) // * [Rust by Example](http://rustbyexample.com/) +// * [The Advanced Rust Programming Guide](http://cglab.ca/~abeinges/blah/turpl/_book/README.html) // * The [Rust Subreddit](https://www.reddit.com/r/rust/) // * For the IRC channel and other forums, see the "Community" section of the [Rust Documentation index](http://doc.rust-lang.org/index.html) -- 2.30.2 From ccf917e1f212cb8f8b07331ec60011f270621dd4 Mon Sep 17 00:00:00 2001 From: Ralf Jung Date: Tue, 21 Jul 2015 11:38:10 +0200 Subject: [PATCH 16/16] tune explanation of PhantomData ownership annotation --- src/part16.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/part16.rs b/src/part16.rs index 5ae6a48..eef6f0b 100644 --- a/src/part16.rs +++ b/src/part16.rs @@ -43,9 +43,9 @@ type NodePtr = *mut Node; // will own data of type `T`. //@ The type `PhantomData` does not actually store anything in memory - it has size zero. However, logically, //@ Rust will consider a `T` to be present. In this case, Rust knows that data of type `T` may be dropped -//@ whenever a `LinkedList` is dropped. The checks involving destructors are pretty subtle, so it's always -//@ a good idea to provide such extra information. In safe Rust, this can all be done automatically, but here, -//@ we just have a `*mut Node`, which Rust does not consider as actually owning the data it points to. +//@ whenever a `LinkedList` is dropped. Dropping has a lot of subtle checks to it, making sure that things can't go +//@ wrong. For this to work, Rust needs to know which types could potentially be dropped. In safe Rust, this can all be inferred +//@ automatically, but here, we just have a `*mut Node`, and we need to tell Rust that we actually own such data. pub struct LinkedList { first: NodePtr, last: NodePtr, -- 2.30.2