From 562558d25054c5be82f11acad0fbe53699de5b1c Mon Sep 17 00:00:00 2001 From: Ralf Jung Date: Tue, 14 Jul 2015 12:42:31 +0200 Subject: [PATCH 1/1] finish parts 12, 13 --- TODO.txt | 7 +--- solutions/src/main.rs | 5 +++ solutions/src/rgrep.rs | 2 +- src/main.rs | 2 ++ src/part11.rs | 16 +++++++--- src/part12.rs | 71 +++++++++++++++++++++++------------------ src/part13.rs | 60 +++++++++++++++++++++------------- workspace/src/part11.rs | 5 ++- workspace/src/part12.rs | 25 +++++++++------ workspace/src/part13.rs | 33 ++++++++++++------- 10 files changed, 139 insertions(+), 87 deletions(-) diff --git a/TODO.txt b/TODO.txt index cc3268b..ed3434c 100644 --- a/TODO.txt +++ b/TODO.txt @@ -1,8 +1,3 @@ -* Arrays/slices -* Arc, concurrency, channels: Some grep-like thing, "rgrep" -* Send, Sync -* External dependencies: regexp crate, add to rgrep - -* Shared-memoty concurrency, interior mutability: Concurrent counter +* Shared-memoty concurrency, interior mutability, Sync: Concurrent counter * Drop, unsafe: doubly-linked list diff --git a/solutions/src/main.rs b/solutions/src/main.rs index 8afd81c..a0e3f72 100644 --- a/solutions/src/main.rs +++ b/solutions/src/main.rs @@ -1,3 +1,8 @@ +// This crate contains solutions to *some* of the exercises, and it bundles +// the projects that span multiple parts together in one file per project. +// It is not always up-to-date with the code in the actual course, and mainly +// serves as draft board for new parts or exercises. + extern crate docopt; pub mod bigint; diff --git a/solutions/src/rgrep.rs b/solutions/src/rgrep.rs index 316e6f0..e64d2fd 100644 --- a/solutions/src/rgrep.rs +++ b/solutions/src/rgrep.rs @@ -134,7 +134,7 @@ fn get_options() -> Options { fn run(options: Options) { let options = Arc::new(options); - // Set up the chain of threads. Use `sync_channel` with buffer-size of 16 to avoid needlessly filling RAM. + // This sets up the chain of threads. Use `sync_channel` with buffer-size of 16 to avoid needlessly filling RAM. let (line_sender, line_receiver) = sync_channel(16); let (filtered_sender, filtered_receiver) = sync_channel(16); diff --git a/src/main.rs b/src/main.rs index 0290eba..0aa0429 100644 --- a/src/main.rs +++ b/src/main.rs @@ -79,6 +79,8 @@ // * [Part 09: Iterators](part09.html) // * [Part 10: Closures](part10.html) // * [Part 11: Trait Objects, Box, Rc, Lifetime bounds](part11.html) +// * [Part 12: Concurrency, Send](part12.html) +// * [Part 13: Slices, Arrays, External Dependencies](part13.html) // * (to be continued) #![allow(dead_code, unused_imports, unused_variables, unused_mut, unreachable_code)] /* extern crate docopt; */ diff --git a/src/part11.rs b/src/part11.rs index 4e5c5fd..caf866a 100644 --- a/src/part11.rs +++ b/src/part11.rs @@ -66,7 +66,10 @@ mod callbacks { //@ variable into the closure. Its environment will then contain a `usize` rather than a `&mut uszie`, and have //@ no effect on this local variable anymore. let mut count: usize = 0; - c.register(Box::new(move |val| { count = count+1; println!("Callback 2, {}. time: {}", count, val); } )); + c.register(Box::new(move |val| { + count = count+1; + println!("Callback 2, {}. time: {}", count, val); + } )); c.call(1); c.call(2); } } @@ -109,9 +112,12 @@ mod callbacks_clone { //@ and do the creation of the `Rc` and the conversion to `Fn(i32)` itself. //@ For this to work, we need to demand that the type `F` does not contain any short-lived borrows. After all, we will store it - //@ in our list of callbacks indefinitely. `'static` is a lifetime, the lifetime of the entire program. We can use lifetimes - //@ as bounds on types, to demand that anything in (an element of) the type lives at least as long as this lifetime. That bound was implicit in the `Box` - //@ above, and it is the reason we could not have the borrowed `count` in the closure in `demo`. + //@ in our list of callbacks indefinitely. If the closure contained a pointer to our caller's stackframe, that pointer + //@ could be invalid by the time the closure is called. We can mitigate this by bounding `F` by a *lifetime*: `T: 'a` says + //@ that all data of type `T` will *outlive* (i.e., will be valid for at least as long as) lifetime `'a`. + //@ Here, we use the special lifetime `'static`, which is the lifetime of the entire program. + //@ The same bound has been implicitly added in the version of `register` above, and in the definition of + //@ `Callbacks`. This is the reason we could not have the borrowed `count` in the closure in `demo` previously. pub fn register(&mut self, callback: F) { self.callbacks.push(Rc::new(callback)); /*@*/ } @@ -147,4 +153,4 @@ mod callbacks_clone { //@ than one version per type it is instantiated with). This makes for smaller code, but you pay the overhead of the virtual function calls. //@ Isn't it beautiful how traits can handle both of these cases (and much more, as we saw, like closures and operator overloading) nicely? -//@ [index](main.html) | [previous](part10.html) | [next](main.html) +//@ [index](main.html) | [previous](part10.html) | [next](part12.html) diff --git a/src/part12.rs b/src/part12.rs index 477a3ae..8a14def 100644 --- a/src/part12.rs +++ b/src/part12.rs @@ -1,18 +1,20 @@ -// Rust-101, Part 12: Concurrency (WIP) -// ================= +// Rust-101, Part 12: Concurrency, Send +// ==================================== use std::io::prelude::*; use std::{io, fs, thread}; use std::sync::mpsc::{sync_channel, SyncSender, Receiver}; use std::sync::Arc; -//@ This part is introducing the concurrency features of Rust. We are going to write our own small version of "grep", -//@ called *rgrep*, and it is going to make use of multiple cores: One thread reads the input files, one thread does -//@ the actual matching, and one thread writes the output. +//@ Our next stop are the concurrency features of Rust. We are going to write our own small version of "grep", +//@ called *rgrep*, and it is going to make use of concurrency: One thread reads the input files, one thread does +//@ the actual matching, and one thread writes the output. I already mentioned in the beginning of the course that +//@ Rust's type system (more precisely, the discipline of ownership and borrowing) will help us to avoid a common +//@ pitfall of concurrent programming: data races. // Before we come to the actual code, we define a data-structure `Options` to store all the information we need // to complete the job: Which files to work on, which pattern to look for, and how to output.
-// Besides just printing all the matching lines, we will also offer to count them, or alternatively to sort them. +//@ Besides just printing all the matching lines, we will also offer to count them, or alternatively to sort them. #[derive(Clone,Copy)] pub enum OutputMode { Print, @@ -29,19 +31,17 @@ pub struct Options { //@ Now we can write three functions to do the actual job of reading, matching, and printing, respectively. //@ To get the data from one thread to the next, we will use *message passing*: We will establish communication -//@ channels between the threads, with one thread *sending* data, and the other one receiving it. `SyncSender` +//@ channels between the threads, with one thread *sending* data, and the other one *receiving* it. `SyncSender` //@ is the type of the sending end of a synchronous channel transmitting data of type `T`. *Synchronous* here //@ means that the `send` operation could block, waiting for the other side to make progress. We don't want to -//@ end up with the entire files being stored in the buffer of the channels, and the output not being fast enough +//@ end up with the entire file being stored in the buffer of the channels, and the output not being fast enough //@ to keep up with the speed of input. //@ //@ We also need all the threads to have access to the options of the job they are supposed to do. Since it would //@ be rather unnecessary to actually copy these options around, we will use reference-counting to share them between -//@ all threads. `Arc` is the thread-safe version of `Rc, using atomic operations to keep the reference count up-to-date. -//@ You can also think of this as saying that *all* threads own the `Options` "a bit" - and since there could be other -//@ owners, `Arc` (just like `Rc`) only permits read-only access to its content. That's good enough for the options, though. +//@ all threads. `Arc` is the thread-safe version of `Rc`, using atomic operations to keep the reference count up-to-date. -// The first functions reads the files, and sends every line over the `out_channel`. +// The first function reads the files, and sends every line over the `out_channel`. fn read_files(options: Arc, out_channel: SyncSender) { for file in options.files.iter() { // First, we open the file, ignoring any errors. @@ -59,11 +59,13 @@ fn read_files(options: Arc, out_channel: SyncSender) { // The second function filters the lines it receives through `in_channel` with the pattern, and sends // matches via `out_channel`. -fn filter_lines(options: Arc, in_channel: Receiver, out_channel: SyncSender) { +fn filter_lines(options: Arc, + in_channel: Receiver, + out_channel: SyncSender) { // We can simply iterate over the channel, which will stop when the channel is closed. for line in in_channel.iter() { // `contains` works on lots of types of patterns, but in particular, we can use it to test whether - // one string is contained in another. + // one string is contained in another. This is another example of Rust using traits as substitute for overloading. if line.contains(&options.pattern) { out_channel.send(line).unwrap(); /*@*/ } @@ -100,7 +102,7 @@ pub fn run(options: Options) { // We move the `options` into an `Arc`, as that's what the thread workers expect. let options = Arc::new(options); - // Set up the channels. Use `sync_channel` with buffer-size of 16 to avoid needlessly filling RAM. + // This sets up the channels. We use a `sync_channel` with buffer-size of 16 to avoid needlessly filling RAM. let (line_sender, line_receiver) = sync_channel(16); let (filtered_sender, filtered_receiver) = sync_channel(16); @@ -113,7 +115,9 @@ pub fn run(options: Options) { // Same with the filter thread. let options2 = options.clone(); - let handle2 = thread::spawn(move || filter_lines(options2, line_receiver, filtered_sender)); + let handle2 = thread::spawn(move || { + filter_lines(options2, line_receiver, filtered_sender) + }); // And the output thread. let options3 = options.clone(); @@ -129,45 +133,50 @@ pub fn run(options: Options) { //@ We need to call `to_string` on string literals to convert them to a fully-owned `String`. pub fn main() { let options = Options { - files: vec!["src/part10.rs".to_string(), "src/part11.rs".to_string(), "src/part12.rs".to_string()], + files: vec!["src/part10.rs".to_string(), + "src/part11.rs".to_string(), + "src/part12.rs".to_string()], pattern: "let".to_string(), output_mode: Print }; run(options); } -// **Exercise 12.1**: Change rgrep such that it prints now only the matching lines, but also the name of the file +// **Exercise 12.1**: Change rgrep such that it prints not only the matching lines, but also the name of the file // and the number of the line in the file. You will have to change the type of the channels from `String` to something // that records this extra information. //@ ## Ownership, Borrowing, and Concurrency -//@ The little demo above showed that concurrency in Rust has a fairly simple API. However, considering Rust has closures, -//@ that should not be entirely surprising. However, as I mentioned in the beginning, Rust ensures that well-typed programs -//@ do not have data races. How can that be? A data race is typically defined as having two concurrent, unsynchronized +//@ The little demo above showed that concurrency in Rust has a fairly simple API. Considering Rust has closures, +//@ that should not be entirely surprising. However, as it turns out, Rust goes well beyond this and actually ensures +//@ the absence of data races.
+//@ A data race is typically defined as having two concurrent, unsynchronized //@ accesses to the same memory location, at least one of which is a write. In other words, a data race is mutation in //@ the presence of aliasing, which Rust reliably rules out! It turns out that the same mechanism that makes our single-threaded //@ programs memory safe, and that prevents us from invalidating iterators, also helps secure our multi-threaded code against //@ data races. For example, notice how `read_files` sends a `String` to `filter_lines`. At run-time, only the pointer to -//@ the string will actually be moved around (just like when a `String` is passed to a function with full ownership). However, +//@ the character data will actually be moved around (just like when a `String` is passed to a function with full ownership). However, //@ `read_files` has to *give up* ownership of the string to perform `send`, to it is impossible for an outstanding borrow to -//@ still be around. After it sent the string to the other side, `read_files` has no way to race on the data with someone else. +//@ still be around. After it sent the string to the other side, `read_files` has no pointer into the string content +//@ anymore, and hence no way to race on the data with someone else. //@ -//@ However, there is more to this. Remember the `'static` bound we had to add to `register` in the previous part, to make -//@ sure that the callbacks to not reference any pointers that might become invalid? This is just as crucial for spawning +//@ There is a little more to this. Remember the `'static` bound we had to add to `register` in the previous part, to make +//@ sure that the callbacks do not reference any pointers that might become invalid? This is just as crucial for spawning //@ a thread: In general, that thread could last for much longer than the current stack frame. Thus, it must not use //@ any pointers to data in that stack frame. This is achieved by requiring the `FnOnce` closure passed to `thread::spawn` //@ to be valid for lifetime `'static`, as you can see in [its documentation](http://doc.rust-lang.org/stable/std/thread/fn.spawn.html). //@ This avoids another kind of data race, where the thread's access races with the callee deallocating its stack frame. +//@ It is only thanks to the concept of lifetimes that this can be expressed as part of the type of `spawn`. //@ ## Send -//@ However, the story goes further. I said above that `Arc` is a thread-safe version of `Rc`, which uses atomic operations -//@ to manipulate the reference count. It is thus crucial that we don't use `Rc` above, or the reference count may become invalid. -//@ And indeed, if you replace `Arc` by `Rc` (and add the appropriate imports), Rust will tell you that something is wrong. -//@ That's great, of course, but how did it do that? +//@ However, the story goes even further. I said above that `Arc` is a thread-safe version of `Rc`, which uses atomic operations +//@ to manipulate the reference count. It is thus crucial that we don't use `Rc` across multiple threads, or the reference count may +//@ become invalid. And indeed, if you replace `Arc` by `Rc` (and add the appropriate imports), Rust will tell you that something +//@ is wrong. That's great, of course, but how did it do that? //@ //@ The answer is already hinted at in the error: It will say something about `Send`. You may have noticed that the closure in //@ `thread::spawn` does not just have a `'static` bound, but also has to satisfy `Send`. `Send` is a trait, and just like `Copy`, -//@ it's just a marker - there are no functions provided by `Send` What the trait says is that types which are `Send`, can be +//@ it's just a marker - there are no functions provided by `Send`. What the trait says is that types which are `Send`, can be //@ safely sent to another thread without causing trouble. Of course, all the primitive data-types are `Send`. So is `Arc`, //@ which is why Rust accepted our code. But `Rc` is not `Send`, and for a good reason! //@ @@ -176,4 +185,4 @@ pub fn main() { //@ So if the environment of your closure contains an `Rc`, it won't be `Send`, preventing it from causing trouble. If however every //@ captured variable *is* `Send`, then so is the entire environment, and you are good. -//@ [index](main.html) | [previous](part11.html) | [next](main.html) +//@ [index](main.html) | [previous](part11.html) | [next](part13.html) diff --git a/src/part13.rs b/src/part13.rs index 0121079..bd1fca7 100644 --- a/src/part13.rs +++ b/src/part13.rs @@ -1,21 +1,21 @@ // Rust-101, Part 13: Slices, Arrays, External Dependencies -// ================= +// ======================================================== //@ To complete rgrep, there are two pieces we still need to implement: Sorting, and taking the job options //@ as argument to the program, rather than hard-coding them. Let's start with sorting. // ## Slices //@ Again, we first have to think about the type we want to give to our sorting function. We may be inclined to -//@ pass it a `Vec`. Now, sorting does not actually consume the argument, so we could make that a `&mut Vec`. +//@ pass it a `Vec`. Of course, sorting does not actually consume the argument, so we should make that a `&mut Vec`. //@ But there's a problem with that: If we want to implement some divide-and-conquer sorting algorithm (say, //@ Quicksort), then we will have to *split* our argument at some point, and operate recursively on the two parts. //@ But we can't split a `Vec`! We could now extend the function signature to also take some indices, marking the //@ part of the vector we are supposed to sort, but that's all rather clumsy. Rust offers a nicer solution. -//@ + //@ `[T]` is the type of an (unsized) *array*, with elements of type `T`. All this means is that there's a contiguous -//@ region of memory, where a bunch of `T` are stored. How many` We can't tell! This is an unsized type. Just like for -//@ trait objects, this means we can only operate on pointers to that type, and these pointers will containing the missing -//@ information - namely, the length. Such a pointer is called a *slice*. As we will see, a slice can be split! +//@ region of memory, where a bunch of `T` are stored. How many? We can't tell! This is an unsized type. Just like for +//@ trait objects, this means we can only operate on pointers to that type, and these pointers will carry the missing +//@ information - namely, the length. Such a pointer is called a *slice*. As we will see, a slice can be split. //@ Our function can thus take a borrowed slice, and promise to sort all elements in there. pub fn sort(data: &mut [T]) { if data.len() < 2 { return; } @@ -24,9 +24,11 @@ pub fn sort(data: &mut [T]) { // making sure that everything on the left is no larger than the pivot, and everything on the right is no smaller. let mut lpos = 1; let mut rpos = data.len(); - /* Invariant: pivot is data[0]; everything with index (0,lpos) is <= pivot; [rpos,len) is >= pivot; lpos < rpos */ + /* Invariant: pivot is data[0]; everything with index (0,lpos) is <= pivot; + [rpos,len) is >= pivot; lpos < rpos */ loop { - // **Exercise 13.1**: Complete this Quicksort loop. You can use `swap` on slices to swap two elements. + // **Exercise 13.1**: Complete this Quicksort loop. You can use `swap` on slices to swap two elements. Write a + // test function for `sort`. unimplemented!() } @@ -38,14 +40,14 @@ pub fn sort(data: &mut [T]) { //@ one in the middle, and set the lengths appropriately such that they don't overlap. This is what `split_at_mut` does. //@ Since the two slices don't overlap, there is no aliasing and we can have them both mutably borrowed. let (part1, part2) = data.split_at_mut(lpos); - //@ The index operation can not only be used to address certain elements, it can also be used for "slicing": Giving a range + //@ The index operation can not only be used to address certain elements, it can also be used for *slicing*: Giving a range //@ of indices, and obtaining an appropriate part of the slice we started with. Here, we remove the last element from //@ `part1`, which is the pivot. This makes sure both recursive calls work on strictly smaller slices. sort(&mut part1[..lpos-1]); /*@*/ sort(part2); /*@*/ } -// **Exercise 13.2*: Since `String` implements `PartialEq`, you can now change the function `output_lines` in the previous part +// **Exercise 13.2**: Since `String` implements `PartialEq`, you can now change the function `output_lines` in the previous part // to call the sort function above. If you did exercise 12.1, you will have slightly more work. Make sure you sort by the matched line // only, not by filename or line number! @@ -61,8 +63,8 @@ fn sort_nums(data: &mut Vec) { //@ numbers, all one right next to the other in memory. Arrays are sized, and hence can be used like any other type. But we can also //@ borrow them as slices, e.g., to sort them. fn sort_array() { - let mut data: [f64; 5] = [1.0, 3.4, 12.7, -9.12, 0.1]; - sort(&mut data); + let mut array_of_data: [f64; 5] = [1.0, 3.4, 12.7, -9.12, 0.1]; + sort(&mut array_of_data); } // ## External Dependencies @@ -86,8 +88,8 @@ fn sort_array() { //@ path. All of this is explained in the [Cargo Guide](http://doc.crates.io/guide.html). // I disabled the following module (using a rather bad hack), because it only compiles if `docopt` is linked. However, before enabling it, -// you still have get the external library into the global namespace. This is done with `extern crate docopt;`, and that statement *has* to be -// in `main.rs`. So please go there, and enable this commented-out line. Then remove the attribute of the following module. +// you still have get the external library into the global namespace. This is done with `extern crate docopt`, and that statement *has* to be +// in `main.rs`. So please go there, and enable this commented-out line. Then remove the attribute of the `rgrep` module. #[cfg(feature = "disabled")] pub mod rgrep { // Now that `docopt` is linked and declared in `main.rs`, we can import it with `use`. We also import some other pieces that we will need. @@ -95,7 +97,7 @@ pub mod rgrep { use part12::{run, Options, OutputMode}; use std::process; - // The USAGE string documents how the program is to be called. It's written in a format that `docopt` can parse. + // The `USAGE` string documents how the program is to be called. It's written in a format that `docopt` can parse. static USAGE: &'static str = " Usage: rgrep [-c] [-s] ... @@ -106,7 +108,12 @@ Options: // This function extracts the rgrep options from the command-line arguments. fn get_options() -> Options { - // Parse argv and exit the program with an error message if it fails. This is taken from the [`docopt` documentation](http://burntsushi.net/rustdoc/docopt/). + // Parse `argv` and exit the program with an error message if it fails. This is taken from the [`docopt` documentation](http://burntsushi.net/rustdoc/docopt/). + //@ The function `and_then` takes a closure from `T` to `Result`, and uses it to transform a `Result` to a + //@ `Result`. This way, we can chain computations that only happen if the previous one succeeded (and the error + //@ type has to stay the same). In case you know about monads, this style of programming will be familiar to you. + //@ There's a similar function for `Option`. `unwrap_or_else` is a bit like `unwrap`, but rather than panicking in + //@ case of an `Err`, it calls the closure. let args = Docopt::new(USAGE).and_then(|d| d.parse()).unwrap_or_else(|e| e.exit()); // Now we can get all the values out. let count = args.get_bool("-c"); @@ -119,15 +126,23 @@ Options: } // We need to make the strings owned to construct the `Options` instance. - //@ If you check all the type carefully, you will notice that `pattern` above if of type `&str`. `str` is the type of a UTF-8 encoded string, that is, a bunch of - //@ bytes in memory (`[u8]`) that are valid according of UTF-8. `str` is unsized. `&str` is a sliced string, and stores the address of the character data, and - //@ their length. String literals like "this one" are of type `&'static str`: They point right to the constant section of the binary, you you cannot claim you - //@ own them. However, the borrow is valid for as long as the program runs, hence it has lifetime `'static`. Calling `to_string` will copy the string data - //@ into an owned buffer on the heap, and thus convert it to `String`. + //@ If you check all the types carefully, you will notice that `pattern` above is of type `&str`. `str` is the type of a UTF-8 + //@ encoded string, that is, a bunch of bytes in memory (`[u8]`) that are valid according of UTF-8. `str` is unsized. `&str` + //@ stores the address of the character data, and their length. String literals like "this one" are + //@ of type `&'static str`: They point right to the constant section of the binary, so + //@ However, the borrow is valid for as long as the program runs, hence it has lifetime `'static`. Calling + //@ `to_string` will copy the string data into an owned buffer on the heap, and thus convert it to `String`. + let mode = if count { + OutputMode::Count + } else if sort { + OutputMode::SortAndPrint + } else { + OutputMode::Print + }; Options { files: files.iter().map(|file| file.to_string()).collect(), pattern: pattern.to_string(), - output_mode: if count { OutputMode::Count } else if sort { OutputMode::SortAndPrint } else { OutputMode::Print }, + output_mode: mode, } } @@ -141,5 +156,6 @@ Options: // **Exercise 13.3**: Wouldn't it be nice if rgrep supported regular expressions? There's already a crate that does all the parsing and matching on regular // expression, it's called [regex](https://crates.io/crates/regex). Add this crate to the dependencies of your workspace, add an option ("-r") to switch // the pattern to regular-expression mode, and change `filter_lines` to honor this option. The documentation of regex is available from its crates.io site. +// (You won't be able to use the `regex!` macro if you are on the stable or beta channel of Rust. But it wouldn't help for our use-case anyway.) //@ [index](main.html) | [previous](part12.html) | [next](main.html) diff --git a/workspace/src/part11.rs b/workspace/src/part11.rs index 7e45540..cc2a252 100644 --- a/workspace/src/part11.rs +++ b/workspace/src/part11.rs @@ -41,7 +41,10 @@ mod callbacks { c.call(0); let mut count: usize = 0; - c.register(Box::new(move |val| { count = count+1; println!("Callback 2, {}. time: {}", count, val); } )); + c.register(Box::new(move |val| { + count = count+1; + println!("Callback 2, {}. time: {}", count, val); + } )); c.call(1); c.call(2); } } diff --git a/workspace/src/part12.rs b/workspace/src/part12.rs index 84d47ec..17e26ff 100644 --- a/workspace/src/part12.rs +++ b/workspace/src/part12.rs @@ -1,5 +1,5 @@ -// Rust-101, Part 12: Concurrency (WIP) -// ================= +// Rust-101, Part 12: Concurrency, Send +// ==================================== use std::io::prelude::*; use std::{io, fs, thread}; @@ -9,7 +9,6 @@ use std::sync::Arc; // Before we come to the actual code, we define a data-structure `Options` to store all the information we need // to complete the job: Which files to work on, which pattern to look for, and how to output.
-// Besides just printing all the matching lines, we will also offer to count them, or alternatively to sort them. #[derive(Clone,Copy)] pub enum OutputMode { Print, @@ -25,7 +24,7 @@ pub struct Options { } -// The first functions reads the files, and sends every line over the `out_channel`. +// The first function reads the files, and sends every line over the `out_channel`. fn read_files(options: Arc, out_channel: SyncSender) { for file in options.files.iter() { // First, we open the file, ignoring any errors. @@ -43,11 +42,13 @@ fn read_files(options: Arc, out_channel: SyncSender) { // The second function filters the lines it receives through `in_channel` with the pattern, and sends // matches via `out_channel`. -fn filter_lines(options: Arc, in_channel: Receiver, out_channel: SyncSender) { +fn filter_lines(options: Arc, + in_channel: Receiver, + out_channel: SyncSender) { // We can simply iterate over the channel, which will stop when the channel is closed. for line in in_channel.iter() { // `contains` works on lots of types of patterns, but in particular, we can use it to test whether - // one string is contained in another. + // one string is contained in another. This is another example of Rust using traits as substitute for overloading. if line.contains(&options.pattern) { unimplemented!() } @@ -83,7 +84,7 @@ pub fn run(options: Options) { // We move the `options` into an `Arc`, as that's what the thread workers expect. let options = Arc::new(options); - // Set up the channels. Use `sync_channel` with buffer-size of 16 to avoid needlessly filling RAM. + // This sets up the channels. We use a `sync_channel` with buffer-size of 16 to avoid needlessly filling RAM. let (line_sender, line_receiver) = sync_channel(16); let (filtered_sender, filtered_receiver) = sync_channel(16); @@ -93,7 +94,9 @@ pub fn run(options: Options) { // Same with the filter thread. let options2 = options.clone(); - let handle2 = thread::spawn(move || filter_lines(options2, line_receiver, filtered_sender)); + let handle2 = thread::spawn(move || { + filter_lines(options2, line_receiver, filtered_sender) + }); // And the output thread. let options3 = options.clone(); @@ -108,14 +111,16 @@ pub fn run(options: Options) { // Now we have all the pieces together for testing our rgrep with some hard-coded options. pub fn main() { let options = Options { - files: vec!["src/part10.rs".to_string(), "src/part11.rs".to_string(), "src/part12.rs".to_string()], + files: vec!["src/part10.rs".to_string(), + "src/part11.rs".to_string(), + "src/part12.rs".to_string()], pattern: "let".to_string(), output_mode: Print }; run(options); } -// **Exercise 12.1**: Change rgrep such that it prints now only the matching lines, but also the name of the file +// **Exercise 12.1**: Change rgrep such that it prints not only the matching lines, but also the name of the file // and the number of the line in the file. You will have to change the type of the channels from `String` to something // that records this extra information. diff --git a/workspace/src/part13.rs b/workspace/src/part13.rs index 3ef7785..311eba5 100644 --- a/workspace/src/part13.rs +++ b/workspace/src/part13.rs @@ -1,8 +1,9 @@ // Rust-101, Part 13: Slices, Arrays, External Dependencies -// ================= +// ======================================================== // ## Slices + pub fn sort(data: &mut [T]) { if data.len() < 2 { return; } @@ -10,9 +11,11 @@ pub fn sort(data: &mut [T]) { // making sure that everything on the left is no larger than the pivot, and everything on the right is no smaller. let mut lpos = 1; let mut rpos = data.len(); - /* Invariant: pivot is data[0]; everything with index (0,lpos) is <= pivot; [rpos,len) is >= pivot; lpos < rpos */ + /* Invariant: pivot is data[0]; everything with index (0,lpos) is <= pivot; + [rpos,len) is >= pivot; lpos < rpos */ loop { - // **Exercise 13.1**: Complete this Quicksort loop. You can use `swap` on slices to swap two elements. + // **Exercise 13.1**: Complete this Quicksort loop. You can use `swap` on slices to swap two elements. Write a + // test function for `sort`. unimplemented!() } @@ -24,7 +27,7 @@ pub fn sort(data: &mut [T]) { unimplemented!() } -// **Exercise 13.2*: Since `String` implements `PartialEq`, you can now change the function `output_lines` in the previous part +// **Exercise 13.2**: Since `String` implements `PartialEq`, you can now change the function `output_lines` in the previous part // to call the sort function above. If you did exercise 12.1, you will have slightly more work. Make sure you sort by the matched line // only, not by filename or line number! @@ -35,16 +38,16 @@ fn sort_nums(data: &mut Vec) { // ## Arrays fn sort_array() { - let mut data: [f64; 5] = [1.0, 3.4, 12.7, -9.12, 0.1]; - sort(&mut data); + let mut array_of_data: [f64; 5] = [1.0, 3.4, 12.7, -9.12, 0.1]; + sort(&mut array_of_data); } // ## External Dependencies // I disabled the following module (using a rather bad hack), because it only compiles if `docopt` is linked. However, before enabling it, -// you still have get the external library into the global namespace. This is done with `extern crate docopt;`, and that statement *has* to be -// in `main.rs`. So please go there, and enable this commented-out line. Then remove the attribute of the following module. +// you still have get the external library into the global namespace. This is done with `extern crate docopt`, and that statement *has* to be +// in `main.rs`. So please go there, and enable this commented-out line. Then remove the attribute of the `rgrep` module. #[cfg(feature = "disabled")] pub mod rgrep { // Now that `docopt` is linked and declared in `main.rs`, we can import it with `use`. We also import some other pieces that we will need. @@ -52,7 +55,7 @@ pub mod rgrep { use part12::{run, Options, OutputMode}; use std::process; - // The USAGE string documents how the program is to be called. It's written in a format that `docopt` can parse. + // The `USAGE` string documents how the program is to be called. It's written in a format that `docopt` can parse. static USAGE: &'static str = " Usage: rgrep [-c] [-s] ... @@ -63,7 +66,7 @@ Options: // This function extracts the rgrep options from the command-line arguments. fn get_options() -> Options { - // Parse argv and exit the program with an error message if it fails. This is taken from the [`docopt` documentation](http://burntsushi.net/rustdoc/docopt/). + // Parse `argv` and exit the program with an error message if it fails. This is taken from the [`docopt` documentation](http://burntsushi.net/rustdoc/docopt/). let args = Docopt::new(USAGE).and_then(|d| d.parse()).unwrap_or_else(|e| e.exit()); // Now we can get all the values out. let count = args.get_bool("-c"); @@ -76,10 +79,17 @@ Options: } // We need to make the strings owned to construct the `Options` instance. + let mode = if count { + OutputMode::Count + } else if sort { + OutputMode::SortAndPrint + } else { + OutputMode::Print + }; Options { files: files.iter().map(|file| file.to_string()).collect(), pattern: pattern.to_string(), - output_mode: if count { OutputMode::Count } else if sort { OutputMode::SortAndPrint } else { OutputMode::Print }, + output_mode: mode, } } @@ -93,4 +103,5 @@ Options: // **Exercise 13.3**: Wouldn't it be nice if rgrep supported regular expressions? There's already a crate that does all the parsing and matching on regular // expression, it's called [regex](https://crates.io/crates/regex). Add this crate to the dependencies of your workspace, add an option ("-r") to switch // the pattern to regular-expression mode, and change `filter_lines` to honor this option. The documentation of regex is available from its crates.io site. +// (You won't be able to use the `regex!` macro if you are on the stable or beta channel of Rust. But it wouldn't help for our use-case anyway.) -- 2.30.2