[root]
name = "rust-101"
version = "0.1.0"
+dependencies = [
+ "docopt 0.6.67 (registry+https://github.com/rust-lang/crates.io-index)",
+]
+
+[[package]]
+name = "aho-corasick"
+version = "0.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+dependencies = [
+ "memchr 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)",
+]
+
+[[package]]
+name = "docopt"
+version = "0.6.67"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+dependencies = [
+ "regex 0.1.40 (registry+https://github.com/rust-lang/crates.io-index)",
+ "rustc-serialize 0.3.15 (registry+https://github.com/rust-lang/crates.io-index)",
+ "strsim 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)",
+]
+
+[[package]]
+name = "libc"
+version = "0.1.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+
+[[package]]
+name = "memchr"
+version = "0.1.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+dependencies = [
+ "libc 0.1.9 (registry+https://github.com/rust-lang/crates.io-index)",
+]
+
+[[package]]
+name = "regex"
+version = "0.1.40"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+dependencies = [
+ "aho-corasick 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)",
+ "memchr 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)",
+ "regex-syntax 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
+]
+
+[[package]]
+name = "regex-syntax"
+version = "0.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+
+[[package]]
+name = "rustc-serialize"
+version = "0.3.15"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+
+[[package]]
+name = "strsim"
+version = "0.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
name = "rust-101"
version = "0.1.0"
authors = ["Ralf Jung <post@ralfj.de>"]
+
+[dependencies]
+docopt = "*"
use std::io::prelude::*;
-use std::{io, fs, thread, process};
+use std::{io, fs, thread, process, cmp};
use std::sync::mpsc::{sync_channel, SyncSender, Receiver};
use std::sync::Arc;
line: usize,
}
+impl PartialEq for Line {
+ fn eq(&self, other: &Line) -> bool {
+ self.data.eq(&other.data)
+ }
+}
+impl PartialOrd for Line {
+ fn partial_cmp(&self, other: &Line) -> Option<cmp::Ordering> {
+ self.data.partial_cmp(&other.data)
+ }
+}
+
fn read_files(options: Arc<Options>, out_channel: SyncSender<Line>) {
for (fileidx, file) in options.files.iter().enumerate() {
let file = fs::File::open(file).unwrap();
}
}
+fn sort<T: PartialOrd>(data: &mut [T]) {
+ if data.len() < 2 { return; }
+
+ let mut lpos = 1;
+ let mut rpos = data.len();
+ // Invariant: pivot is data[0]; (0,lpos) is <= pivot; [rpos,len) is >= pivot; lpos < rpos
+ loop {
+ while lpos < rpos && data[lpos] <= data[0] {
+ lpos += 1;
+ }
+ while rpos > lpos && data[rpos-1] >= data[0] {
+ rpos -= 1;
+ }
+ if rpos == lpos {
+ break;
+ }
+
+ data.swap(lpos, rpos-1);
+ }
+
+ data.swap(0, lpos-1); // put pivot in the right place
+
+ let (part1, part2) = data.split_at_mut(lpos);
+ sort(&mut part1[..lpos-1]);
+ sort(part2);
+}
+
fn output_lines(options: Arc<Options>, in_channel: Receiver<Line>) {
match options.output_mode {
Print => {
println!("{} hits for {}.", count, options.pattern);
},
SortAndPrint => {
- let _data: Vec<Line> = in_channel.iter().collect();
- unimplemented!()
+ let mut data: Vec<Line> = in_channel.iter().collect();
+ sort(&mut data[..]);
+ for line in data.iter() {
+ println!("{}:{}: {}", options.files[line.file], line.line, line.data);
+ }
}
}
}
// * [Part 10: Closures](part10.html)
// * [Part 11: Trait Objects, Box, Rc, Lifetime bounds](part11.html)
// * (to be continued)
-#![allow(dead_code, unused_imports, unused_variables, unused_mut)]
+#![allow(dead_code, unused_imports, unused_variables, unused_mut, unreachable_code)]
+/* extern crate docopt; */
mod part00;
mod part01;
mod part02;
mod part10;
mod part11;
mod part12;
+mod part13;
// To actually run the code of some part (after filling in the blanks, if necessary), simply edit the `main`
// function.
-
fn main() {
part00::main();
}
+
// Additional material
// -------------------
//
// to complete the job: Which files to work on, which pattern to look for, and how to output. <br/>
// Besides just printing all the matching lines, we will also offer to count them, or alternatively to sort them.
#[derive(Clone,Copy)]
-enum OutputMode {
+pub enum OutputMode {
Print,
SortAndPrint,
Count,
}
use self::OutputMode::*;
-struct Options {
- files: Vec<String>,
- pattern: String,
- output_mode: OutputMode,
+pub struct Options {
+ pub files: Vec<String>,
+ pub pattern: String,
+ pub output_mode: OutputMode,
}
//@ Now we can write three functions to do the actual job of reading, matching, and printing, respectively.
},
SortAndPrint => {
// We are asked to sort the matching lines before printing. So let's collect them all in a local vector...
- let data: Vec<String> = in_channel.iter().collect();
+ let mut data: Vec<String> = in_channel.iter().collect();
// ...and implement the actual sorting later.
unimplemented!()
}
// With the operations of the three threads defined, we can now implement a function that performs grepping according
// to some given options.
-fn run(options: Options) {
+pub fn run(options: Options) {
// We move the `options` into an `Arc`, as that's what the thread workers expect.
let options = Arc::new(options);
handle3.join().unwrap();
}
-// Now we have all the pieces together for testing our `rgrep` with some hard-coded options.
+// Now we have all the pieces together for testing our rgrep with some hard-coded options.
//@ We need to call `to_string` on string literals to convert them to a fully-owned `String`.
pub fn main() {
let options = Options {
run(options);
}
-// **Exercise 12.1**: Change `rgrep` such that it prints now only the matching lines, but also the name of the file
+// **Exercise 12.1**: Change rgrep such that it prints now only the matching lines, but also the name of the file
// and the number of the line in the file. You will have to change the type of the channels from `String` to something
// that records this extra information.
--- /dev/null
+// Rust-101, Part 13: Slices, Arrays, External Dependencies
+// =================
+
+//@ To complete rgrep, there are two pieces we still need to implement: Sorting, and taking the job options
+//@ as argument to the program, rather than hard-coding them. Let's start with sorting.
+
+// ## Slices
+//@ Again, we first have to think about the type we want to give to our sorting function. We may be inclined to
+//@ pass it a `Vec<T>`. Now, sorting does not actually consume the argument, so we could make that a `&mut Vec<T>`.
+//@ But there's a problem with that: If we want to implement some divide-and-conquer sorting algorithm (say,
+//@ Quicksort), then we will have to *split* our argument at some point, and operate recursively on the two parts.
+//@ But we can't split a `Vec`! We could now extend the function signature to also take some indices, marking the
+//@ part of the vector we are supposed to sort, but that's all rather clumsy. Rust offers a nicer solution.
+//@
+//@ `[T]` is the type of an (unsized) *array*, with elements of type `T`. All this means is that there's a contiguous
+//@ region of memory, where a bunch of `T` are stored. How many` We can't tell! This is an unsized type. Just like for
+//@ trait objects, this means we can only operate on pointers to that type, and these pointers will containing the missing
+//@ information - namely, the length. Such a pointer is called a *slice*. As we will see, a slice can be split!
+//@ Our function can thus take a borrowed slice, and promise to sort all elements in there.
+pub fn sort<T: PartialOrd>(data: &mut [T]) {
+ if data.len() < 2 { return; }
+
+ // We decide that the element at 0 is our pivot, and then we move our cursors through the rest of the slice,
+ // making sure that everything on the left is no larger than the pivot, and everything on the right is no smaller.
+ let mut lpos = 1;
+ let mut rpos = data.len();
+ /* Invariant: pivot is data[0]; everything with index (0,lpos) is <= pivot; [rpos,len) is >= pivot; lpos < rpos */
+ loop {
+ // **Exercise 13.1**: Complete this Quicksort loop. You can use `swap` on slices to swap two elements.
+ unimplemented!()
+ }
+
+ // Once our cursors met, we need to put the pivot in the right place.
+ data.swap(0, lpos-1);
+
+ // Finally, we split our slice to sort the two halves. The nice part about slices is that splitting them is cheap:
+ //@ They are just a pointer to a start address, and a length. We can thus get two pointers, one at the beginning and
+ //@ one in the middle, and set the lengths appropriately such that they don't overlap. This is what `split_at_mut` does.
+ //@ Since the two slices don't overlap, there is no aliasing and we can have them both mutably borrowed.
+ let (part1, part2) = data.split_at_mut(lpos);
+ //@ The index operation can not only be used to address certain elements, it can also be used for "slicing": Giving a range
+ //@ of indices, and obtaining an appropriate part of the slice we started with. Here, we remove the last element from
+ //@ `part1`, which is the pivot. This makes sure both recursive calls work on strictly smaller slices.
+ sort(&mut part1[..lpos-1]); /*@*/
+ sort(part2); /*@*/
+}
+
+// **Exercise 13.2*: Since `String` implements `PartialEq`, you can now change the function `output_lines` in the previous part
+// to call the sort function above. If you did exercise 12.1, you will have slightly more work. Make sure you sort by the matched line
+// only, not by filename or line number!
+
+// Now, we can sort, e.g., an vector of numbers.
+fn sort_nums(data: &mut Vec<i32>) {
+ //@ Vectors support slicing, just like slices do. Here, `..` denotes the full range, which means we want to slice the entire vector.
+ //@ It is then passed to the `sort` function, which doesn't even know that it is working on data inside a vector.
+ sort(&mut data[..]);
+}
+
+// ## Arrays
+//@ An *array* in Rust is given be the type `[T; n]`, where `n` is some *fixed* number. So, `[f64; 10]` is an array of 10 floating-point
+//@ numbers, all one right next to the other in memory. Arrays are sized, and hence can be used like any other type. But we can also
+//@ borrow them as slices, e.g., to sort them.
+fn sort_array() {
+ let mut data: [f64; 5] = [1.0, 3.4, 12.7, -9.12, 0.1];
+ sort(&mut data);
+}
+
+// ## External Dependencies
+//@ This leaves us with just one more piece to complete rgrep: Taking arguments from the command-line. We could now directly work on
+//@ [`std::env::args`](http://doc.rust-lang.org/beta/std/env/fn.args.html) to gain access to those arguments, and this would become
+//@ a pretty boring lesson in string manipulation. Instead, I want to use this opportunity to show how easy it is to benefit from
+//@ other people's work in your program.
+//@
+//@ For sure, we are not the first to equip a Rust program with support for command-line arguments. Someone must have written a library
+//@ for the job, right? Indeed, someone has. Rust has a central repository of published libraries, called [crates.io](https://crates.io/).
+//@ It's a bit like [PyPI](https://pypi.python.org/pypi) or the [Ruby Gems](https://rubygems.org/): Everybody can upload their code,
+//@ and there's tooling for importing that code into your project. This tooling is provided by `cargo`, the tool we are already using to
+//@ build this tutorial. (`cargo` also has support for *publishing* your crate on crates.io, I refer you to [the documentation](http://doc.crates.io/crates-io.html) for more details.)
+//@ In this case, we are going to use the [`docopt` crate](https://crates.io/crates/docopt), which creates a parser for command-line
+//@ arguments based on the usage string. External dependencies are declared in the `Cargo.toml` file.
+
+//@ I already prepared that file, but the declaration of the dependency is still commented out. So please open `Cargo.toml` of your workspace
+//@ now, and enabled the two commented-out lines. Then do `cargo build`. Cargo will now download the crate from crates.io, compile it,
+//@ and link it to your program. In the future, you can do `cargo update` to make it download new versions of crates you depend on.
+//@ Note that crates.io is only the default location for dependencies, you can also give it the URL of a git repository or some local
+//@ path. All of this is explained in the [Cargo Guide](http://doc.crates.io/guide.html).
+
+// I disabled the following module (using a rather bad hack), because it only compiles if `docopt` is linked. However, before enabling it,
+// you still have get the external library into the global namespace. This is done with `extern crate docopt;`, and that statement *has* to be
+// in `main.rs`. So please go there, and enable this commented-out line. Then remove the attribute of the following module.
+#[cfg(feature = "disabled")]
+pub mod rgrep {
+ // Now that `docopt` is linked and declared in `main.rs`, we can import it with `use`. We also import some other pieces that we will need.
+ use docopt::Docopt;
+ use part12::{run, Options, OutputMode};
+ use std::process;
+
+ // The USAGE string documents how the program is to be called. It's written in a format that `docopt` can parse.
+ static USAGE: &'static str = "
+Usage: rgrep [-c] [-s] <pattern> <file>...
+
+Options:
+ -c, --count Count number of matching lines (rather than printing them).
+ -s, --sort Sort the lines before printing.
+";
+
+ // This function extracts the rgrep options from the command-line arguments.
+ fn get_options() -> Options {
+ // Parse argv and exit the program with an error message if it fails. This is taken from the [`docopt` documentation](http://burntsushi.net/rustdoc/docopt/).
+ let args = Docopt::new(USAGE).and_then(|d| d.parse()).unwrap_or_else(|e| e.exit());
+ // Now we can get all the values out.
+ let count = args.get_bool("-c");
+ let sort = args.get_bool("-s");
+ let pattern = args.get_str("<pattern>");
+ let files = args.get_vec("<file>");
+ if count && sort {
+ println!("Setting both '-c' and '-s' at the same time does not make any sense.");
+ process::exit(1);
+ }
+
+ // We need to make the strings owned to construct the `Options` instance.
+ //@ If you check all the type carefully, you will notice that `pattern` above if of type `&str`. `str` is the type of a UTF-8 encoded string, that is, a bunch of
+ //@ bytes in memory (`[u8]`) that are valid according of UTF-8. `str` is unsized. `&str` is a sliced string, and stores the address of the character data, and
+ //@ their length. String literals like "this one" are of type `&'static str`: They point right to the constant section of the binary, you you cannot claim you
+ //@ own them. However, the borrow is valid for as long as the program runs, hence it has lifetime `'static`. Calling `to_string` will copy the string data
+ //@ into an owned buffer on the heap, and thus convert it to `String`.
+ Options {
+ files: files.iter().map(|file| file.to_string()).collect(),
+ pattern: pattern.to_string(),
+ output_mode: if count { OutputMode::Count } else if sort { OutputMode::SortAndPrint } else { OutputMode::Print },
+ }
+ }
+
+ // Finally, we can call the `run` function from the previous part on the options extracted using `get_options`. Edit `main.rs` to call this function.
+ // You can now use `cargo run -- <pattern> <files>` to call your program, and see the argument parser and the threads we wrote previously in action!
+ pub fn main() {
+ run(get_options());
+ }
+}
+
+// **Exercise 13.3**: Wouldn't it be nice if rgrep supported regular expressions? There's already a crate that does all the parsing and matching on regular
+// expression, it's called [regex](https://crates.io/crates/regex). Add this crate to the dependencies of your workspace, add an option ("-r") to switch
+// the pattern to regular-expression mode, and change `filter_lines` to honor this option. The documentation of regex is available from its crates.io site.
+
+//@ [index](main.html) | [previous](part12.html) | [next](main.html)
[package]
name = "rust-101-workspace"
version = "0.0.0"
+
+#[dependencies]
+#docopt = "*"
-#![allow(dead_code, unused_imports, unused_variables, unused_mut)]
+#![allow(dead_code, unused_imports, unused_variables, unused_mut, unreachable_code)]
// Only the files imported here will be compiled.
mod part00;
mod part10;
mod part11;
mod part12;
+mod part13;
// This decides which part is actually run.
fn main() {
// to complete the job: Which files to work on, which pattern to look for, and how to output. <br/>
// Besides just printing all the matching lines, we will also offer to count them, or alternatively to sort them.
#[derive(Clone,Copy)]
-enum OutputMode {
+pub enum OutputMode {
Print,
SortAndPrint,
Count,
}
use self::OutputMode::*;
-struct Options {
- files: Vec<String>,
- pattern: String,
- output_mode: OutputMode,
+pub struct Options {
+ pub files: Vec<String>,
+ pub pattern: String,
+ pub output_mode: OutputMode,
}
},
SortAndPrint => {
// We are asked to sort the matching lines before printing. So let's collect them all in a local vector...
- let data: Vec<String> = in_channel.iter().collect();
+ let mut data: Vec<String> = in_channel.iter().collect();
// ...and implement the actual sorting later.
unimplemented!()
}
// With the operations of the three threads defined, we can now implement a function that performs grepping according
// to some given options.
-fn run(options: Options) {
+pub fn run(options: Options) {
// We move the `options` into an `Arc`, as that's what the thread workers expect.
let options = Arc::new(options);
handle3.join().unwrap();
}
-// Now we have all the pieces together for testing our `rgrep` with some hard-coded options.
+// Now we have all the pieces together for testing our rgrep with some hard-coded options.
pub fn main() {
let options = Options {
files: vec!["src/part10.rs".to_string(), "src/part11.rs".to_string(), "src/part12.rs".to_string()],
run(options);
}
-// **Exercise 12.1**: Change `rgrep` such that it prints now only the matching lines, but also the name of the file
+// **Exercise 12.1**: Change rgrep such that it prints now only the matching lines, but also the name of the file
// and the number of the line in the file. You will have to change the type of the channels from `String` to something
// that records this extra information.
--- /dev/null
+// Rust-101, Part 13: Slices, Arrays, External Dependencies
+// =================
+
+
+// ## Slices
+pub fn sort<T: PartialOrd>(data: &mut [T]) {
+ if data.len() < 2 { return; }
+
+ // We decide that the element at 0 is our pivot, and then we move our cursors through the rest of the slice,
+ // making sure that everything on the left is no larger than the pivot, and everything on the right is no smaller.
+ let mut lpos = 1;
+ let mut rpos = data.len();
+ /* Invariant: pivot is data[0]; everything with index (0,lpos) is <= pivot; [rpos,len) is >= pivot; lpos < rpos */
+ loop {
+ // **Exercise 13.1**: Complete this Quicksort loop. You can use `swap` on slices to swap two elements.
+ unimplemented!()
+ }
+
+ // Once our cursors met, we need to put the pivot in the right place.
+ data.swap(0, lpos-1);
+
+ // Finally, we split our slice to sort the two halves. The nice part about slices is that splitting them is cheap:
+ let (part1, part2) = data.split_at_mut(lpos);
+ unimplemented!()
+}
+
+// **Exercise 13.2*: Since `String` implements `PartialEq`, you can now change the function `output_lines` in the previous part
+// to call the sort function above. If you did exercise 12.1, you will have slightly more work. Make sure you sort by the matched line
+// only, not by filename or line number!
+
+// Now, we can sort, e.g., an vector of numbers.
+fn sort_nums(data: &mut Vec<i32>) {
+ sort(&mut data[..]);
+}
+
+// ## Arrays
+fn sort_array() {
+ let mut data: [f64; 5] = [1.0, 3.4, 12.7, -9.12, 0.1];
+ sort(&mut data);
+}
+
+// ## External Dependencies
+
+
+// I disabled the following module (using a rather bad hack), because it only compiles if `docopt` is linked. However, before enabling it,
+// you still have get the external library into the global namespace. This is done with `extern crate docopt;`, and that statement *has* to be
+// in `main.rs`. So please go there, and enable this commented-out line. Then remove the attribute of the following module.
+#[cfg(feature = "disabled")]
+pub mod rgrep {
+ // Now that `docopt` is linked and declared in `main.rs`, we can import it with `use`. We also import some other pieces that we will need.
+ use docopt::Docopt;
+ use part12::{run, Options, OutputMode};
+ use std::process;
+
+ // The USAGE string documents how the program is to be called. It's written in a format that `docopt` can parse.
+ static USAGE: &'static str = "
+Usage: rgrep [-c] [-s] <pattern> <file>...
+
+Options:
+ -c, --count Count number of matching lines (rather than printing them).
+ -s, --sort Sort the lines before printing.
+";
+
+ // This function extracts the rgrep options from the command-line arguments.
+ fn get_options() -> Options {
+ // Parse argv and exit the program with an error message if it fails. This is taken from the [`docopt` documentation](http://burntsushi.net/rustdoc/docopt/).
+ let args = Docopt::new(USAGE).and_then(|d| d.parse()).unwrap_or_else(|e| e.exit());
+ // Now we can get all the values out.
+ let count = args.get_bool("-c");
+ let sort = args.get_bool("-s");
+ let pattern = args.get_str("<pattern>");
+ let files = args.get_vec("<file>");
+ if count && sort {
+ println!("Setting both '-c' and '-s' at the same time does not make any sense.");
+ process::exit(1);
+ }
+
+ // We need to make the strings owned to construct the `Options` instance.
+ Options {
+ files: files.iter().map(|file| file.to_string()).collect(),
+ pattern: pattern.to_string(),
+ output_mode: if count { OutputMode::Count } else if sort { OutputMode::SortAndPrint } else { OutputMode::Print },
+ }
+ }
+
+ // Finally, we can call the `run` function from the previous part on the options extracted using `get_options`. Edit `main.rs` to call this function.
+ // You can now use `cargo run -- <pattern> <files>` to call your program, and see the argument parser and the threads we wrote previously in action!
+ pub fn main() {
+ run(get_options());
+ }
+}
+
+// **Exercise 13.3**: Wouldn't it be nice if rgrep supported regular expressions? There's already a crate that does all the parsing and matching on regular
+// expression, it's called [regex](https://crates.io/crates/regex). Add this crate to the dependencies of your workspace, add an option ("-r") to switch
+// the pattern to regular-expression mode, and change `filter_lines` to honor this option. The documentation of regex is available from its crates.io site.
+