From ccf679adb3790903849f7d85b970b67582220952 Mon Sep 17 00:00:00 2001 From: Ralf Jung Date: Tue, 14 Jul 2015 19:56:34 +0200 Subject: [PATCH] Add first version of part 14 --- solutions/src/counter.rs | 56 +++++++++++++++ solutions/src/main.rs | 1 + src/main.rs | 1 + src/part12.rs | 3 + src/part13.rs | 2 +- src/part14.rs | 143 +++++++++++++++++++++++++++++++++++++++ workspace/src/main.rs | 1 + workspace/src/part14.rs | 69 +++++++++++++++++++ 8 files changed, 275 insertions(+), 1 deletion(-) create mode 100644 solutions/src/counter.rs create mode 100644 src/part14.rs create mode 100644 workspace/src/part14.rs diff --git a/solutions/src/counter.rs b/solutions/src/counter.rs new file mode 100644 index 0000000..265fb99 --- /dev/null +++ b/solutions/src/counter.rs @@ -0,0 +1,56 @@ +use std::sync::{Arc, RwLock}; +use std::thread; + +#[derive(Clone)] +struct ConcurrentCounter(Arc>); + +impl ConcurrentCounter { + // The constructor should not be surprising. + pub fn new(val: usize) -> Self { + ConcurrentCounter(Arc::new(RwLock::new(val))) + } + + pub fn increment(&self, by: usize) { + let mut counter = self.0.write().unwrap(); + *counter = *counter + by; + } + + pub fn get(&self) -> usize { + let counter = self.0.read().unwrap(); + *counter + } +} + +// Now our counter is ready for action. +pub fn main() { + let counter = ConcurrentCounter::new(0); + + // We clone the counter for the first thread, which increments it by 2 every 15ms. + let counter1 = counter.clone(); + let handle1 = thread::spawn(move || { + for _ in 0..10 { + thread::sleep_ms(15); + counter1.increment(2); + } + }); + + // The second thread increments the counter by 3 every 20ms. + let counter2 = counter.clone(); + let handle2 = thread::spawn(move || { + for _ in 0..10 { + thread::sleep_ms(20); + counter2.increment(3); + } + }); + + // Now we want to watch the threads working on the counter. + for _ in 0..50 { + thread::sleep_ms(5); + println!("Current value: {}", counter.get()); + } + + // Finally, wait for all the threads to finish to be sure we can catch the counter's final value. + handle1.join().unwrap(); + handle2.join().unwrap(); + println!("Final value: {}", counter.get()); +} diff --git a/solutions/src/main.rs b/solutions/src/main.rs index a0e3f72..be6e3d5 100644 --- a/solutions/src/main.rs +++ b/solutions/src/main.rs @@ -8,6 +8,7 @@ extern crate docopt; pub mod bigint; pub mod vec; pub mod rgrep; +pub mod counter; pub fn main() { rgrep::main(); diff --git a/src/main.rs b/src/main.rs index 3a42ec0..55a8aa9 100644 --- a/src/main.rs +++ b/src/main.rs @@ -105,6 +105,7 @@ mod part10; mod part11; mod part12; mod part13; +mod part14; // To actually run the code of some part (after filling in the blanks, if necessary), simply edit the `main` // function. diff --git a/src/part12.rs b/src/part12.rs index 3e959f9..dc0da61 100644 --- a/src/part12.rs +++ b/src/part12.rs @@ -124,6 +124,9 @@ pub fn run(options: Options) { let handle3 = thread::spawn(move || output_lines(options3, filtered_receiver)); // Finally, wait until all three threads did their job. + //@ Joining a thread waits for its termination. This can fail if that thread panicked: In this case, we could get + //@ access to the data that it provided to `panic!`. Here, we just assert that they did not panic - so we will panic ourselves + //@ if that happened. handle1.join().unwrap(); handle2.join().unwrap(); handle3.join().unwrap(); diff --git a/src/part13.rs b/src/part13.rs index bd1fca7..811411c 100644 --- a/src/part13.rs +++ b/src/part13.rs @@ -69,7 +69,7 @@ fn sort_array() { // ## External Dependencies //@ This leaves us with just one more piece to complete rgrep: Taking arguments from the command-line. We could now directly work on -//@ [`std::env::args`](http://doc.rust-lang.org/beta/std/env/fn.args.html) to gain access to those arguments, and this would become +//@ [`std::env::args`](http://doc.rust-lang.org/stable/std/env/fn.args.html) to gain access to those arguments, and this would become //@ a pretty boring lesson in string manipulation. Instead, I want to use this opportunity to show how easy it is to benefit from //@ other people's work in your program. //@ diff --git a/src/part14.rs b/src/part14.rs new file mode 100644 index 0000000..32f0fcd --- /dev/null +++ b/src/part14.rs @@ -0,0 +1,143 @@ +// Rust-101, Part 14: Mutex, Sync (WIP) +// ============================== + +use std::sync::{Arc, Mutex}; +use std::thread; + +//@ We already saw that we can use `Arc` to share memory between threads. However, `Arc` can only provide everybody +//@ with *read-only* to memory: Since there is aliasing, Rust cannot, in general, permit mutation. If however, +//@ some care would be taken at run-time, then mutation would still be all right: We have to ensure that whenever +//@ someone changes the data, nobody else is working on it. In other words, we need a *critical section* or (as it +//@ is called in Rust) a [`Mutex`](http://doc.rust-lang.org/stable/std/sync/struct.Mutex.html). Some other languages also call this a *lock*. +//@ +//@ As an example, let us write a concurrent counter. As usual, we first have to think about our data-structure in Rust. +//@ In case of the mutex, this means we have to declare the type of the data that we want to be protected. In Rust, +//@ a `Mutex` protects data, not code. This is generally considered good style, but other languages typically lack +//@ the ability to actually enforce this. As we will see, it is impossible to forget to acquire the mutex in Rust. +//@ Of course, we want multiple threads to have access to this `Mutex`, so we wrap it in an `Arc`. +//@ +//@ Rather than giving every field a name, a struct can also be defined by just giving a sequence of types (similar +//@ to how a variant of an `enum` is defined). This is called a *tuple struct*. It is often used when constructing +//@ a *newtype*, as we do here: `ConcurrentCounter` is essentially just a new name for `Arc>`. However, +//@ is is a locally declared types, so we can give it an inherent implementation and implement traits for it. Since the +//@ field is private, nobody outside this module can even know the type we are wrapping. + +// The derived `Clone` implementation will clone the `Arc`, so all clones will actually talk about the same counter. +#[derive(Clone)] +struct ConcurrentCounter(Arc>); + +impl ConcurrentCounter { + // The constructor should not be surprising. + pub fn new(val: usize) -> Self { + ConcurrentCounter(Arc::new(Mutex::new(val))) + } + + //@ The core operation is, of course, `increment`. The type may be surprising at first: A shared borrow? + //@ How can this be, since `increment` definitely modifies the counter? We already discussed above that `Mutex` is + //@ a way to get around this restriction in Rust. This phenomenon of data that can be mutated through a shared + //@ borrow is called *interior mutability*: We are changing the inner parts of the object, but seen from the outside, + //@ this does not count as "mutation". This stands in contrast to *exterior mutability*, which is the kind of + //@ mutability we saw so far, where one piece of data is replaced by something else of the same type. If you are familiar + //@ with languages like ML, you can compare this to how something of type `ref` permit mutation, even though it is + //@ itself a functional value (more precisely, a location) like all the others. + //@ + //@ Interior mutability breaks the rules of Rust that I outlined earlier: There is aliasing (a shared borrow) and mutation. + //@ The reason that this still works is careful programming of the primitives for interior mutability - in this case, that's + //@ `Mutex`. It has to ensure with dynamic checks, at run-time, that things don't fall apart. In particular, it has to ensure + //@ that the data covered by the mutex can only ever be accessed from inside a critical section. This is where Rust's type + //@ system comes into play: With its discipline of ownership and borrowing, it can enforce such rules. Let's see how this goes. + pub fn increment(&self, by: usize) { + // `lock` on a mutex returns a *guard*, giving access to the data contained in the mutex. + //@ (We will discuss the `unwrap` soon.) `.0` is how we access the first component of a tuple or a struct. + let mut counter = self.0.lock().unwrap(); + *counter = *counter + by; + //@ At the end of the function, `counter` is dropped and the mutex is available again. + //@ This can only happen when full ownership of the guard is given up. In particular, it is impossible for us + //@ to borrow some of its content, release the lock of the mutex, and subsequently access the protected data without holding + //@ the lock. Enforcing the locking discipline is expressible in the Rust type system, so we don't have to worry + //@ about data races *even though* we are mutating shared memory! + //@ + //@ One of the subtle aspects of locking is *poisoning*. If a thread panics while it holds a lock, it could leave the + //@ data-structure in a bad state. The lock is hence considered *poisoned*. Future attempts to `lock` it will thus fail. + //@ Above, we simply assert via `unwrap` that this will never happen. Alternatively, we could have a look at the poisoned + //@ state and attempt to recover from it. + } + + pub fn get(&self) -> usize { + let counter = self.0.lock().unwrap(); + *counter + } +} + +// Now our counter is ready for action. +pub fn main() { + let counter = ConcurrentCounter::new(0); + + // We clone the counter for the first thread, which increments it by 2 every 15ms. + let counter1 = counter.clone(); + let handle1 = thread::spawn(move || { + for _ in 0..10 { + thread::sleep_ms(15); + counter1.increment(2); + } + }); + + // The second thread increments the counter by 3 every 20ms. + let counter2 = counter.clone(); + let handle2 = thread::spawn(move || { + for _ in 0..10 { + thread::sleep_ms(20); + counter2.increment(3); + } + }); + + // Now we want to watch the threads working on the counter. + for _ in 0..50 { + thread::sleep_ms(5); + println!("Current value: {}", counter.get()); + } + + // Finally, wait for all the threads to finish to be sure we can catch the counter's final value. + handle1.join().unwrap(); + handle2.join().unwrap(); + println!("Final value: {}", counter.get()); +} + +// **Exercise 14.1**: Besides `Mutex`, there's also [`RwLock`](http://doc.rust-lang.org/stable/std/sync/struct.RwLock.html), which +// provides two ways of locking: One that grants only read-only access, to any number of concurrent readers, and another one +// for exclusive write access. (Notice that this is the same pattern we already saw with shared vs. mutable borrows.) Change +// the code above to use `RwLock`, such that multiple calls to `get` can be executed at the same time. + +//@ ## Sync +//@ In part 12, we talked about types that are marked `Send` and thus can be moved to another thread. However, we did *not* +//@ talk about the question whether a borrow is `Send`. For `&mut T`, the answer is: It is `Send` whenever `T` is send. +//@ `&mut` allows moving values back and forth, it is even possible to [`swap`](http://doc.rust-lang.org/beta/std/mem/fn.swap.html) +//@ the contents of two mutably borrowed values. So in terms of concurrency, sending a mutable borrow is very much like +//@ sending full ownership. +//@ +//@ But what about `&T`, a shared borrow? Without interior mutability, it would always be all-right to send such values. +//@ After one, no mutation can be performed, so there can be as many threads accessing the data as we like. In the +//@ presence of interior mutability though, the story gets more complicated. Rust introduces another marker trait for +//@ this purpose: `Sync`. A type `T` is `Sync` if `&T` is `Send`. Just like `Send`, `Sync` has a default implementation +//@ and is thus automatically implemented for a data-structure *if* all its members implement it. +//@ +//@ Almost all the types we saw so far are `Sync`, with the exception of `Rc`. Remember that a shared borrow is good enough +//@ for cloning, and we don't want other threads to clone our local `Rc`, so it must not be `Sync`. The rule of `Mutex` +//@ is to enforce synchronization, so it should not be entirely surprising that `Mutex` is `Send` *and* `Sync` provided that +//@ `T` is `Send`. +//@ +//@ There's also an example of a type that's `Send`, but not `Sync`: [`RefCell`](http://doc.rust-lang.org/beta/std/cell/struct.RefCell.html). +//@ This type is very much like `RwLock`, but it's not thread-safe: "Locking" is done without atomic operations. +//@ One can also see it as a dynamically checked version of Rust's usual borrowing rules. You have to explicitly say +//@ when you want to borrow the data in there shared, or mutably, and Rust will complain at run-time if you have +//@ a mutable borrow while any other borrow is active. You can then write programs that Rust may otherwise not +//@ accept. Sending a shared borrow to this to another thread is dangerous, as the checks are not performed in +//@ a thread-safe manner. However, sending the *entire* `RefCell` is okay, because there's only ever one owner, and all +//@ we need to ensure is that everybody attempting to borrow is in the same thread as the owner. +//@ +//@ You may be curious whether there is a type that's `Sync`, but not `Send`. There are indeed rather esoteric examples +//@ of such types, but that's not a topic I want to go into. In case you are curious, there's a +//@ [Rust RFC](https://github.com/rust-lang/rfcs/blob/master/text/0458-send-improvements.md), which contains a type `RcMut` that would be `Sync` and not `Send`. +//@ You may also be interested in [this blog post](https://huonw.github.io/blog/2015/02/some-notes-on-send-and-sync/) on the topic. + +//@ [index](main.html) | [previous](part13.html) | [next](main.html) diff --git a/workspace/src/main.rs b/workspace/src/main.rs index 7e7c200..26e9eed 100644 --- a/workspace/src/main.rs +++ b/workspace/src/main.rs @@ -15,6 +15,7 @@ mod part10; mod part11; mod part12; mod part13; +mod part14; // This decides which part is actually run. fn main() { diff --git a/workspace/src/part14.rs b/workspace/src/part14.rs new file mode 100644 index 0000000..37afcbc --- /dev/null +++ b/workspace/src/part14.rs @@ -0,0 +1,69 @@ +// Rust-101, Part 14: Mutex, Sync (WIP) +// ============================== + +use std::sync::{Arc, Mutex}; +use std::thread; + + +// The derived `Clone` implementation will clone the `Arc`, so all clones will actually talk about the same counter. +#[derive(Clone)] +struct ConcurrentCounter(Arc>); + +impl ConcurrentCounter { + // The constructor should not be surprising. + pub fn new(val: usize) -> Self { + ConcurrentCounter(Arc::new(Mutex::new(val))) + } + + pub fn increment(&self, by: usize) { + // `lock` on a mutex returns a *guard*, giving access to the data contained in the mutex. + let mut counter = self.0.lock().unwrap(); + *counter = *counter + by; + } + + pub fn get(&self) -> usize { + let counter = self.0.lock().unwrap(); + *counter + } +} + +// Now our counter is ready for action. +pub fn main() { + let counter = ConcurrentCounter::new(0); + + // We clone the counter for the first thread, which increments it by 2 every 15ms. + let counter1 = counter.clone(); + let handle1 = thread::spawn(move || { + for _ in 0..10 { + thread::sleep_ms(15); + counter1.increment(2); + } + }); + + // The second thread increments the counter by 3 every 20ms. + let counter2 = counter.clone(); + let handle2 = thread::spawn(move || { + for _ in 0..10 { + thread::sleep_ms(20); + counter2.increment(3); + } + }); + + // Now we want to watch the threads working on the counter. + for _ in 0..50 { + thread::sleep_ms(5); + println!("Current value: {}", counter.get()); + } + + // Finally, wait for all the threads to finish to be sure we can catch the counter's final value. + handle1.join().unwrap(); + handle2.join().unwrap(); + println!("Final value: {}", counter.get()); +} + +// **Exercise 14.1**: Besides `Mutex`, there's also [`RwLock`](http://doc.rust-lang.org/stable/std/sync/struct.RwLock.html), which +// provides two ways of locking: One that grants only read-only access, to any number of concurrent readers, and another one +// for exclusive write access. (Notice that this is the same pattern we already saw with shared vs. mutable borrows.) Change +// the code above to use `RwLock`, such that multiple calls to `get` can be executed at the same time. + + -- 2.30.2