src/part15.rs

   1 // Rust-101, Part 15: Mutex, Interior Mutability (cont.), RwLock, Sync
   2 // ===================================================================
   3
   4 use std::sync::{Arc, Mutex};
   5 use std::thread;
   6 use std::time::Duration;
   7
   8 //@ We already saw that we can use `Arc` to share memory between threads. However, `Arc` can only provide *read-only*
   9 //@ access to memory: Since there is aliasing, Rust cannot, in general, permit mutation. To implement shared-memory
  10 //@ concurrency, we need to have aliasing and permutation - following, of course, some strict rules to make sure
  11 //@ there are no data races. In Rust, shared-memory concurrency is obtained through *interior mutability*,
  12 //@ which we already discussed in a single-threaded context in part 12.
  13 //@
  14 //@ ## `Mutex`
  15 //@ The most basic type for interior mutability that supports concurrency is [`Mutex<T>`](https://doc.rust-lang.org/stable/std/sync/struct.Mutex.html).
  16 //@ This type implements *critical sections* (or *locks*), but in a data-driven way: One has to specify
  17 //@ the type of the data that's protected by the mutex, and Rust ensures that the data is *only* accessed
  18 //@ through the mutex. In other words, "lock data, not code" is actually enforced by the type system, which
  19 //@ becomes possible because of the discipline of ownership and borrowing.
  20 //@
  21 //@ As an example, let us write a concurrent counter. As usual in Rust, we first have to think about our data layout:
  22 //@ That will be `Mutex<usize>`. Of course, we want multiple threads to have access to this `Mutex`, so we wrap it in an `Arc`.
  23 //@
  24 //@ Rather than giving every field a name, a struct can also be defined by just giving a sequence of types (similar
  25 //@ to how a variant of an `enum` is defined). This is called a *tuple struct*. It is often used when constructing
  26 //@ a *newtype*, as we do here: `ConcurrentCounter` is essentially just a new name for `Arc<Mutex<usize>>`. However,
  27 //@ is is a locally declared types, so we can give it an inherent implementation and implement traits for it. Since the
  28 //@ field is private, nobody outside this module can even know the type we are wrapping.
  29
  30 // The derived `Clone` implementation will clone the `Arc`, so all clones will actually talk about the same counter.
  31 #[derive(Clone)]
  32 struct ConcurrentCounter(Arc<Mutex<usize>>);
  33
  34 impl ConcurrentCounter {
  35     // The constructor just wraps the constructors of `Arc` and `Mutex`.
  36     pub fn new(val: usize) -> Self {
  37         ConcurrentCounter(Arc::new(Mutex::new(val)))                /*@*/
  38     }
  39
  40     // The core operation is, of course, `increment`.
  41     pub fn increment(&self, by: usize) {
  42         // `lock` on a mutex returns a guard, very much like `RefCell`. The guard gives access to the data contained in the mutex.
  43         //@ (We will discuss the `unwrap` soon.) `.0` is how we access the first component of a tuple or a struct.
  44         let mut counter = self.0.lock().unwrap();
  45         //@ The guard is a smart pointer to the content.
  46         *counter = *counter + by;
  47         //@ At the end of the function, `counter` is dropped and the mutex is available again.
  48         //@ This can only happen when full ownership of the guard is given up. In particular, it is impossible for us
  49         //@ to take a reference to some of its content, release the lock of the mutex, and subsequently access the protected data without holding
  50         //@ the lock. Enforcing the locking discipline is expressible in the Rust type system, so we don't have to worry
  51         //@ about data races *even though* we are mutating shared memory!
  52         //@
  53         //@ One of the subtle aspects of locking is *poisoning*. If a thread panics while it holds a lock, it could leave the
  54         //@ data-structure in a bad state. The lock is hence considered *poisoned*. Future attempts to `lock` it will fail.
  55         //@ Above, we simply assert via `unwrap` that this will never happen. Alternatively, we could have a look at the poisoned
  56         //@ state and attempt to recover from it.
  57     }
  58
  59     // The function `get` returns the current value of the counter.
  60     pub fn get(&self) -> usize {
  61         let counter = self.0.lock().unwrap();                       /*@*/
  62         *counter                                                    /*@*/
  63     }
  64 }
  65
  66 // Now our counter is ready for action.
  67 pub fn main() {
  68     let counter = ConcurrentCounter::new(0);
  69
  70     // We clone the counter for the first thread, which increments it by 2 every 15ms.
  71     let counter1 = counter.clone();
  72     let handle1 = thread::spawn(move || {
  73         for _ in 0..10 {
  74             thread::sleep(Duration::from_millis(15));
  75             counter1.increment(2);
  76         }
  77     });
  78
  79     // The second thread increments the counter by 3 every 20ms.
  80     let counter2 = counter.clone();
  81     let handle2 = thread::spawn(move || {
  82         for _ in 0..10 {
  83             thread::sleep(Duration::from_millis(20));
  84             counter2.increment(3);
  85         }
  86     });
  87
  88     // Now we watch the threads working on the counter.
  89     for _ in 0..50 {
  90         thread::sleep(Duration::from_millis(5));
  91         println!("Current value: {}", counter.get());
  92     }
  93
  94     // Finally, we wait for all the threads to finish to be sure we can catch the counter's final value.
  95     handle1.join().unwrap();
  96     handle2.join().unwrap();
  97     println!("Final value: {}", counter.get());
  98 }
  99
 100 // **Exercise 15.1**: Add an operation `compare_and_inc(&self, test: usize, by: usize)` that increments the counter by
 101 // `by` *only if* the current value is `test`.
 102 //
 103 // **Exercise 15.2**: Rather than panicking in case the lock is poisoned, we can use `into_inner` on the error to recover
 104 // the data inside the lock. Change the code above to do that. Try using `unwrap_or_else` for this job.
 105
 106 //@ ## `RwLock`
 107 //@ Besides `Mutex`, there's also [`RwLock`](https://doc.rust-lang.org/stable/std/sync/struct.RwLock.html), which
 108 //@ provides two ways of locking: One that grants only read-only access, to any number of concurrent readers, and another one
 109 //@ for exclusive write access. Notice that this is the same pattern we already saw with shared vs. mutable references. Hence
 110 //@ another way of explaining `RwLock` is to say that it is like `RefCell`, but works even for concurrent access. Rather than
 111 //@ panicking when the data is already borrowed, `RwLock` will of course block the current thread until the lock is available.
 112 //@ In this view, `Mutex` is a stripped-down version of `RwLock` that does not distinguish readers and writers.
 113
 114 // **Exercise 15.3**:  Change the code above to use `RwLock`, such that multiple calls to `get` can be executed at the same time.
 115
 116 //@ ## `Sync`
 117 //@ Clearly, if we had used `RefCell` rather than `Mutex`, the code above could not work: `RefCell` is not prepared for
 118 //@ multiple threads trying to access the data at the same time. How does Rust make sure that we don't accidentally use
 119 //@ `RefCell` across multiple threads?
 120 //@
 121 //@ In part 13, we talked about types that are marked `Send` and thus can be moved to another thread. However, we did *not*
 122 //@ talk about the question whether a reference is `Send`. For `&mut T`, the answer is: It is `Send` whenever `T` is send.
 123 //@ `&mut` allows moving values back and forth, it is even possible to [`swap`](https://doc.rust-lang.org/stable/std/mem/fn.swap.html)
 124 //@ the contents of two mutable references. So in terms of concurrency, sending a mutable, unique reference is very much like
 125 //@ sending full ownership, in the sense that it can be used to move the object to another thread.
 126 //@
 127 //@ But what about `&T`, a shared reference? Without interior mutability, it would always be all-right to send such values.
 128 //@ After all, no mutation can be performed, so there can be as many threads accessing the data as we like. In the
 129 //@ presence of interior mutability though, the story gets more complicated. Rust introduces another marker trait for
 130 //@ this purpose: `Sync`. A type `T` is `Sync` if and only if `&T` is `Send`. Just like `Send`, `Sync` has a default implementation
 131 //@ and is thus automatically implemented for a data-structure *if* all its members implement it.
 132 //@
 133 //@ Since `Arc` provides multiple threads with a shared reference to its content, `Arc<T>` is only `Send` if `T` is `Sync`.
 134 //@ So if we had used `RefCell` above, which is *not* `Sync`, Rust would have caught that mistake. Notice however that
 135 //@ `RefCell` *is* `Send`: If ownership of the entire cell is moved to another thread, it is still not possible for several
 136 //@ threads to try to access the data at the same time.
 137 //@
 138 //@ Almost all the types we saw so far are `Sync`, with the exception of `Rc`. Remember that a shared reference is good enough
 139 //@ for cloning, and we don't want other threads to clone our local `Rc` (they would race for updating the reference count),
 140 //@ so it must not be `Sync`. The rule of `Mutex` is to enforce synchronization, so it should not be entirely surprising that
 141 //@ `Mutex<T>` is `Send` *and* `Sync` provided that `T` is `Send`.
 142 //@
 143 //@ You may be curious whether there is a type that's `Sync`, but not `Send`. There are indeed rather esoteric examples
 144 //@ of such types, but that's not a topic I want to go into. In case you are curious, there's a
 145 //@ [Rust RFC](https://github.com/rust-lang/rfcs/blob/master/text/0458-send-improvements.md), which contains a type `RcMut` that would be `Sync` and not `Send`.
 146 //@ You may also be interested in [this blog post](https://huonw.github.io/blog/2015/02/some-notes-on-send-and-sync/) on the topic.
 147
 148 //@ [index](main.html) | [previous](part14.html) | [raw source](workspace/src/part15.rs) | [next](part16.html)