From 187a862323d9120c30bd4824ad6f8760e84c91a4 Mon Sep 17 00:00:00 2001 From: Ralf Jung Date: Sat, 22 Dec 2018 13:44:51 +0100 Subject: [PATCH 01/16] update Stacked Borrows post to (almost) current model --- ...18-11-16-stacked-borrows-implementation.md | 162 +++++++++--------- 1 file changed, 79 insertions(+), 83 deletions(-) diff --git a/personal/_posts/2018-11-16-stacked-borrows-implementation.md b/personal/_posts/2018-11-16-stacked-borrows-implementation.md index df60719..35bf735 100644 --- a/personal/_posts/2018-11-16-stacked-borrows-implementation.md +++ b/personal/_posts/2018-11-16-stacked-borrows-implementation.md @@ -147,8 +147,8 @@ fn demo0() { Well, actually having undefined behavior here is good news, since that's what we wanted from the start! And since there is an implementation of the model in -[miri](https://github.com/solson/miri/), you can try this yourself: The amazing -@shepmaster has integrated miri into the playground, so you can +[Miri](https://github.com/solson/miri/), you can try this yourself: The amazing +@shepmaster has integrated Miri into the playground, so you can [put the example there](https://play.rust-lang.org/?version=stable&mode=debug&edition=2015&gist=d15868687f79072688a0d0dd1e053721) (adjusting it slightly to circumvent the borrow checker), then select "Tools - Miri" and it will complain (together with a rather unreadable backtrace, we sure @@ -202,7 +202,7 @@ fn demo2() { {% endhighlight %} If you -[try this in miri](https://play.rust-lang.org/?version=stable&mode=debug&edition=2015&gist=1bc8c2f432941d02246fea0808e2e4f4), +[try this in Miri](https://play.rust-lang.org/?version=stable&mode=debug&edition=2015&gist=1bc8c2f432941d02246fea0808e2e4f4), you will see it complain: ``` @@ -266,7 +266,7 @@ stack of that location. fn demo1() { let x = &mut 1u8; // tag: `Uniq(0)` // stack: [Uniq(0)]; not frozen - + let y1 = &*x; // tag: `Shr(Some(1))` // stack: [Uniq(0), Shr]; frozen since 1 @@ -337,11 +337,11 @@ before any memory access happens. The operation on a dereference never mutates the stack, but it performs some basic checks that might declare the program UB. The reason for this is twofold: First of all, I think we should require some basic validity for pointers that are dereferenced even when they do not access -memory. Secondly, there is the practical concern for the implementation in miri: +memory. Secondly, there is the practical concern for the implementation in Miri: When we dereference a pointer, we are guaranteed to have type information available (crucial for things that depend on the presence of an `UnsafeCell`), whereas having type information on every memory access would be quite hard to -achieve in miri. +achieve in Miri. Notice that on a dereference, we have *both* a tag at the pointer *and* the type of a pointer, and the two might not agree, which we do not always want to rule @@ -492,16 +492,11 @@ refine these instructions later) on all locations covered by the reference 3. Perform the actions that would also happen when an actual access happens through this reference (for shared references a read access, for mutable references a write access). -4. If the new tag is `Uniq`, push it onto the stack. (The location cannot be - frozen: `Uniq` tags are only created for mutable references, and we just - performed the actions of a write access to memory, which unfreezes - locations.) -5. If the new tag is `Shr`: - - If the location is already frozen, we do nothing. - - Otherwise: - 1. Push a `Shr` item to the stack. - 2. If the location is outside of `UnsafeCell`, it gets frozen with the - timestamp of the new reference. +4. Check if the new tag is `Shr(Some(t))` and the location is inside an `UnsafeCell`. + - If both conditions apply, freeze the location with timestamp `t`. If it + is already frozen, do nothing. + - Otherwise, push a new item onto the stack: `Shr` if the tag is a `Shr(_)`, + `Uniq(id)` if the tag is `Uniq(id)`. One high-level way to think about retagging is that it computes a fresh tag, and then performs a reborrow of the old reference with the new tag. @@ -513,9 +508,12 @@ create raw pointers, and if we are careful enough, use them to access a location from different aliasing pointers. (Of course, "careful enough" is not very precise, but the precise answer is the very model I am describing here.) -To account for this, we need one final ingredient in our model: a special -instruction that indicates that a reference was cast to a raw pointer, and may -thus be accessed from these raw pointers in a shared way. Consider the +To account for this, we consider the act of casting to a raw pointer as a +special way of creating a reference +(["creating a raw reference"](https://github.com/rust-lang/rfcs/pull/2582), so +to speak). As usual for creating a new reference, that operation is followed by +retagging. This retagging is special though because unlike normal retagging it +acts on raw pointers. Consider the [following example](https://play.rust-lang.org/?version=stable&mode=debug&edition=2015&gist=253868e96b7eba85ef28e1eabd557f66): {% highlight rust %} @@ -524,11 +522,11 @@ fn demo4() { Retag(x); // tag of `x` gets changed to `Uniq(0)` // stack: [Uniq(0)]; not frozen + let y1 = x as *mut u8; // Make sure what `x` points to is accessible through raw pointers. - EscapeToRaw(x); + Retag([raw] y1) // tag of `y` gets erased to `Shr(None)` // stack: [Uniq(0), Shr]; not frozen - - let y1 = x as *mut u8; // tag erased: `Shr(None)` + let y2 = y1; unsafe { // All of these first dereference a raw pointer (no checks, tag gets @@ -551,19 +549,19 @@ fn demo4() { } {% endhighlight %} -The behavior of `EscapeToRaw` is best described as "reborrowing for a raw -pointer": The steps are the same as for `Retag` above, except that the new -pointer's tag is `Shr(None)` and we do not freeze (i.e., we behave as if the -entire pointee was inside an `UnsafeCell`). +`Retag([raw])` acts almost like a normal retag, except that it does not ignore +raw pointers and instead tags them `Shr(None)`, and pushes `Shr` to the stack. -This is needed, because when casting a reference to a raw pointer (and actually, -on any cast involving pointers), the tag gets erased, meaning it gets reset to -`Shr(None)`. Thanks to `EscapeToRaw`, there is a matching `Shr` on the stack, -making sure the raw pointer can actually be used. +This way, even if the program casts the pointer to an integer and back (where we +cannot always keep track of the tag, so it might get reset to `Shr(None)`), +there is a matching `Shr` on the stack, making sure the raw pointer can actually +be used. One way to think about this is to consider the reference to "escape" +when it is cast to a raw pointer, which is reflected by the `Shr` item on the +stack. -Knowing about both `Retag` and `EscapeToRaw`, you can now go back to `demo2` and -should be able to fully explain why the stack changes the way it does in that -example. +Knowing about how `Retag` interacts with raw pointers, you can now go back to +`demo2` and should be able to fully explain why the stack changes the way it +does in that example. ### 3.3 The Case of the Aliasing References @@ -614,10 +612,10 @@ it got created, we have no choice but pop the item matching `mut_ref` off the stack. Ouch. This made me realize that creating a shared reference has to be very weak inside -`UnsafeCell`. In fact, it is entirely equivalent to `EscapeToRaw`: We just have -to make sure some kind of shared access is possible, but we have to accept that -there might be active mutable references assuming exclusive access to the same -locations. That on its own is not enough, though. +`UnsafeCell`. In fact, it is entirely equivalent to `Retag([raw])`: We just +have to make sure some kind of shared access is possible, but we have to accept +that there might be active mutable references assuming exclusive access to the +same locations. That on its own is not enough, though. I also added a new check to the retagging procedure: Before taking any action (i.e., before step 3, which could pop items off the stack), we check if the @@ -649,28 +647,28 @@ except for `UnsafeCell`). Moreover, when pushing an item to the stack (at the end of the retag action), we can now be sure that the stack is not yet frozen: if it were frozen, the reborrow would be redundant. -With this extension, the instructions for retagging and `EscapeToRaw` now look -as follows (again executed on all locations covered by the reference, according -to `size_of_val`): +With this extension, the instructions for `Retag` now look as follows (again +executed on all locations covered by the reference, according to `size_of_val`): -1. Compute a fresh tag: `Uniq(_)` for mutable references and `Box`, - `Shr(Some(_))` for shared references, `Shr(None)` if this is `EscapeToRaw`. +1. Compute a fresh tag: `Uniq(_)` for mutable references, `Box`, `Shr(Some(_))` + for shared references, and `Shr(None)` for raw pointers. 2. Perform the checks that would also happen when we dereference this reference. Remember the position of the item matching the tag in the stack. 3. Redundancy check: If the new tag passes the checks performed on a dereference, and if the item that makes this check succeed is *above* the one we remembered in step 2 (where the "frozen" state is considered above every - item in the stack), then we stop. We are done for this location. + item in the stack), then stop. We are done for this location. 4. Perform the actions that would also happen when an actual access happens through this reference (for shared references a read access, for mutable references a write access).
Now the location cannot be frozen any more: If the fresh tag is `Uniq`, we just unfroze; if the fresh tag is `Shr` and the location was already frozen, then the redundancy check (step 3) would have kicked in. -5. If the new tag is `Uniq`, push it onto the stack. -6. If the new tag is `Shr`, push a `Shr` item to the stack. Then, if the - location is outside of `UnsafeCell`, it gets frozen with the timestamp of the - new reference. +5. Check if the new tag is `Shr(Some(t))` and the location is inside an `UnsafeCell`. + - If both conditions apply, freeze the location with timestamp `t`. If it + is already frozen, do nothing. + - Otherwise, push a new item onto the stack: `Shr` if the tag is a `Shr(_)`, + `Uniq(id)` if the tag is `Uniq(id)`. The one thing I find slightly unsatisfying about the redundancy check is that it seems to overlap a bit with the rule that on a *read* access, a `Shr` item @@ -709,25 +707,9 @@ The tag is now "typed" (`Uniq` vs `Shr`) to be able to support `transmute` between references and shared pointers. Such `transmute` were an open question in the original model and some people raised concerns about it in the ensuing discussion. I invite all of you to come up with strange things you think you -should be able to `transmute` and throw them at miri so that we can see if your +should be able to `transmute` and throw them at Miri so that we can see if your use-cases are covered. :) -Creating a shared reference now always pushes a `Shr` item onto the stack, even -when there is no `UnsafeCell`. This means that starting with a mutable reference -`x`, `&*x as *const _ as *mut _` is pretty much equivalent to `x as *mut _`; the -fact that we have an intermediate shared reference does not matter (not for the -aliasing model, anyway). During the implementation, I realized that in `x as -*const _` on a mutable reference, `x` actually first gets coerced to shared -reference, which then gets cast to a raw pointer. This happens in -`NonNull::from`, so if you later write to that `NonNull`, you end up writing to -a raw pointer that was created from a shared reference. Originally I intended -this to be strictly illegal. This is writing to a shared reference after all, -how dare you! However, it turns out it's actually no big deal *if the shared -reference does not get used again later*. This is an access-based model after -all, if a reference never gets used again we do not care much about enforcing -any guarantees for it. (This is another example of a coincidental fix, where I -had a surprisingly passing test case and then investigated what happened.) - The redundancy check during retagging can be seen as refining a similar check that the original model did whenever a new reference was created (where we wouldn't change the state if the new borrow is already active). @@ -836,22 +818,22 @@ reasoning about the case where we call some code via FFI that is written in a language without a notion of "dereferencing", all we care about is the actual memory accesses performed by that foreign code. This also indicates that we could see the checks on pointer dereference as another "shadow state operation" -next to `Retag` and `EscapeToRaw`, and then these three operations plus the -actions on memory accesses are all that there is to Stacked Borrows. This is -difficult to implement in miri because dereferences can happen any time a path -is evaluated, but it is nevertheless interesting and might be useful in a -"lower-level MIR" that does not permit dereferences in paths. +next to `Retag`, and then these two operations plus the actions on memory +accesses are all that there is to Stacked Borrows. This is difficult to +implement in Miri because dereferences can happen any time a path is evaluated, +but it is nevertheless interesting and might be useful in a "lower-level MIR" +that does not permit dereferences in paths. ## 6 Evaluation, and How You Can Help [section 6]: #6-evaluation-and-how-you-can-help I have implemented both the validity invariant and the model as described above -in miri. This [uncovered](https://github.com/rust-lang/rust/issues/54908) two +in Miri. This [uncovered](https://github.com/rust-lang/rust/issues/54908) two [issues](https://github.com/rust-lang/rust/issues/54957) in the standard library, but both were related to validity invariants, not Stacked Borrows. With these exceptions, the model passes the entire test suite. There were some more test failures in earlier versions (as mentioned in [section 4]), but the -final model accepts all the code covered by miri's test suite. (If you look +final model accepts all the code covered by Miri's test suite. (If you look close enough, you can see that three libstd methods are currently whitelisted and what they do is not checked. However, even before I ran into these cases, [efforts](https://github.com/rust-lang/rust/pull/54668) were already @@ -860,37 +842,46 @@ them, so I am not concerned about them.) Moreover I wrote a bunch of compile-fail tests to make sure the model catches various violations of the key properties it should ensure. -I am quite happy with this! I was expecting much more trouble, expecting to run +The most interesting change I had to make to libstd is +[in `NonNull::from`](https://github.com/rust-lang/rust/pull/56161). That +function turned a `&mut T` into a `*const T` going through a `&T`. This means +that the final raw pointer was created from a shared reference, and hence must +not be used for mutation. An earlier version of this post described a model +that would permit such behavior, but I think we should actually at least +experiment with ruling it out: "no mutation through (pointers derived from) +shared references" is an old rule in Rust, after all. + +Overall, I am quite happy with this! I was expecting much more trouble, expecting to run into cases where libstd does strange things that are common or otherwise hard to declare illegal and that my model could not reasonably allow. I see the test suite passing as an indication that this model may be well-suited for Rust. -However, miri's test suite is tiny, and I have but one brain to come up with +However, Miri's test suite is tiny, and I have but one brain to come up with counterexamples! In fact I am quite a bit worried because I literally came up with `demo_refcell` less than two weeks ago, so what else might I have missed? This where you come in. Please test this model! Come up with something funny you think should work (I am thinking about funny `transmute` in particular, using type punning through unions or raw pointers if you prefer that), or maybe you have some crate that has some unsafe code and a test suite (you do have a -test suite, right?) that might run under miri. +test suite, right?) that might run under Miri. The easiest way to try the model is the [playground](https://play.rust-lang.org/): Type the code, select "Tools - Miri", and you'll see what it does. -For things that are too long for the playground, you have to install miri on -your own computer. miri depends on rustc nightly and has to be updated +For things that are too long for the playground, you have to install Miri on +your own computer. Miri depends on rustc nightly and has to be updated regularly to keep working, so it is not well-suited for crates.io. Instead, -installation instructions for miri are provided +installation instructions for Miri are provided [in the README](https://github.com/solson/miri/#running-miri). We are still -working on making installing miri easier. Please let me know if you are having +working on making installing Miri easier. Please let me know if you are having trouble with anything. You can report issues, comment on this post or find me in chat (as of recently, I am partial to Zulip where we have an [unsafe code guidelines stream](https://rust-lang.zulipchat.com/#narrow/stream/136281-wg-unsafe-code-guidelines)). -With miri installed, you can `cargo miri` a project with a binary to run it in -miri. Dependencies should be fully supported, so you can use any crate you -like. It is not unlikely, however, that you will run into issues because miri +With Miri installed, you can `cargo miri` a project with a binary to run it in +Miri. Dependencies should be fully supported, so you can use any crate you +like. It is not unlikely, however, that you will run into issues because Miri does not support some operation. In that case please search the [issue tracker](https://github.com/solson/miri/issues) and report the issue if it is new. We cannot support everything, but we might be able to do something @@ -901,11 +892,12 @@ that [here are some details](https://github.com/solson/miri/issues/479). Moreover, wouldn't it be nice if we could [run the entire libcore, liballoc and libstd test suite in miri](https://github.com/rust-lang/rust/issues/54914)? There are tons of interesting cases of Rust's core data structures being -exercise there, and the comparatively tiny miri test suite has already helped to +exercise there, and the comparatively tiny Miri test suite has already helped to find two soundness bugs, so there are probably more. Once `cargo miri test` works again, it would be great to find a way to run it on the standard library test suites, and set up something so that this happens automatically on a regular basis (so that we notice regressions). +**Update:** `cargo miri test` has been fixed in the mean time, so you can use it on your libraries now! **/Update** As you can see, there is more than enough work for everyone. Don't be shy! I have a mere two weeks left on this internship, after which I will have to @@ -914,7 +906,7 @@ disappear entirely though, don't worry -- I will still be able to mentor you if you want to help with any of the above tasks. :) Thanks to @nikomatsakis for feedback on a draft of this post, to @shepmaster for -making miri available on the playground, and to @oli-obk for reviewing all my +making Miri available on the playground, and to @oli-obk for reviewing all my PRs at unparalleled speed. <3 If you want to @@ -927,3 +919,7 @@ comments, please join the **2018-11-21:** Dereferencing a pointer now always preserves the tag, but casting to a raw pointer resets the tag to `Shr(None)`. `Box` is treated like a mutable reference. + +**2018-12-22:** Creating a shared reference does not push a `Shr` item to the +stack (unless there is an `UnsafeCell`). Moreover, creating a raw pointer is a +special kind of retagging. -- 2.39.5 From 725435c183189caf8563d2dbbf3f4cccf3503103 Mon Sep 17 00:00:00 2001 From: Ralf Jung Date: Wed, 26 Dec 2018 16:57:03 +0100 Subject: [PATCH 02/16] add latest stacked borrows updates --- personal/_posts/2018-12-12-google-scholar.md | 2 + .../2018-12-26-stacked-borrows-barriers.md | 157 ++++++++++++++++++ 2 files changed, 159 insertions(+) create mode 100644 personal/_posts/2018-12-26-stacked-borrows-barriers.md diff --git a/personal/_posts/2018-12-12-google-scholar.md b/personal/_posts/2018-12-12-google-scholar.md index 81c7090..28a4806 100644 --- a/personal/_posts/2018-12-12-google-scholar.md +++ b/personal/_posts/2018-12-12-google-scholar.md @@ -8,6 +8,8 @@ I even found one vacancy that explicitly requested submitting a Google Scholar p Google Scholar provides not just a list of publications (which is of course part of the application, and which is also [available on dblp](http://dblp.org/pers/hd/j/Jung_0002:Ralf)), but also a citation count and computation of several publication-related indices. This post is about why I don't have a Google Scholar profile (yet). + + First of all, having a Google Scholar profile requires a Google account. There is no technical necessity for this, Google already indexes my papers and other databases (like the aforementioned dblp) manage to create per-author pages just fine without authors having to have an account. But, of course, this is a great way for Google to tie more people into their ecosystem -- few kinds of pressure are as effective as when this directly affects hiring decisions. diff --git a/personal/_posts/2018-12-26-stacked-borrows-barriers.md b/personal/_posts/2018-12-26-stacked-borrows-barriers.md new file mode 100644 index 0000000..085a97a --- /dev/null +++ b/personal/_posts/2018-12-26-stacked-borrows-barriers.md @@ -0,0 +1,157 @@ +--- +title: "Barriers and Two-phase Borrows in Stacked Borrows" +categories: internship rust +--- + +My internship ("research assistantship") with Mozilla has ended several weeks ago, and this post is a report of the most recent tweaks I made to Miri and Stacked Borrows. +Neither project is by any means "done", of course. +However, both have reached a fairly reasonable state, so I felt some kind of closing report made sense. +Also, if I ever want to finish my PhD, I'll have to seriously scale down the amount of time I work on Rust -- so at least from my side, things will move more slowly from now on. + + + +## Stacked Borrows Tweaks + +I made some small tweaks to Stacked Borrows which mostly serve to make things overall more consistent. +All of these have been incorporated into my [previous post on Stacked Borrows]({% post_url 2018-11-16-stacked-borrows-implementation %}). + +The only noticeable functional change affects creating a shared reference: previously, that would push a `Shr` item to the stack before freezing the location. +That would, however, permit mutation like in the following example: + +{% highlight rust %} +fn demo_shared_mutation(ptr: &mut u8) { + let shared = &*ptr; // pushes a `Shr` item + let raw = shared as *const u8 as *mut u8; + unsafe { *raw = 5; } // allowed: unfreezing, then matching the `Shr` item +} +{% endhighlight %} + +Since "no mutation through (pointers derived from) shared references" has been a rule in Rust pretty much from the beginning, I changed creating a shared reference to no longer unconditionally push a `Shr` item. +That now only happens when the pointee is mutable because of an `UnsafeCell`. +As a consequence, the code above is now forbidden. + +## Barriers + +Beyond these small tweaks, I implemented the *barriers* that I already described in my [original proposal]({% post_url 2018-08-07-stacked-borrows %}). +The purpose of barriers is to handle code like the following: + +{% highlight rust %} +fn aliasing(ptr: &mut u8, raw: *mut u8) -> u8 { + let val = *ptr; + unsafe { *raw = 4; } + val // we would like to move the `ptr` load down here +} + +fn demo_barrier() -> u8 { + let x = &mut 8u8; + let raw = x as *mut u8; + aliasing(unsafe { &mut *raw }, raw) +} +{% endhighlight %} + +`aliasing` here gets two aliasing pointers as argument. +This means writing to `raw` changes the value in `ptr`, so the desired optimization would change the behavior of `demo_barrier` (currently it returns 8, after moving the load of `ptr` down it returns 4). +Stacked Borrows without barriers would allow this code because `ptr` does not get used again after `raw`. + +To disallow this code and enable the desired optimization, we introduce a new kind of item that can be pushed to the per-location stack: a *function barrier* referring to a particular function call. +While the function is running, the barrier may not be popped. +The type of items on the borrow stack therefore now looks as follows: + +{% highlight rust %} +pub enum BorStackItem { + Uniq(Timestamp), + Shr, + FnBarrier(CallId), +} +{% endhighlight %} + +These barriers get pushed when doing the initial retagging of a function (remember that we insert `Retag` statements for all arguments at the beginning of each function). +We do this only for references (not for `Box`), and we do *not* push a barrier to locations inside a shared `UnsafeCell`. +Concretely, between steps 4 and 5 of retagging (after we performed the action of an access and before we push the new item), we push a `FnBarrier` referring to the current function call. +Moreover, when retagging with barriers, we do not perform the usual redundancy check -- even if the reborrow is redundant, pushing the barrier might be necessary. + +Barriers have no effect when a pointer gets dereferenced: looking for the item in the stack just skips over barriers. +However, when performing a memory access and popping items from the stack, we stop when we encounter an active barrier, i.e., a barrier for a function call that is still ongoing. +If that ever happens, we have undefined behavior. + +Together, these rules make the above example invalid code, and hence enable our optimization: when doing the memory access in `*raw = 4`, we pop until we find a `Shr` item. +That will hit the barrier that was added when `aliasing` did the initial retagging of `val`, triggering UB. + +An interesting (and positive!) consequence of barriers is that they make the following code UB: + +{% highlight rust %} +fn aliasing(ptr1: &mut u8, ptr2: &mut u8) {} + +fn demo_barrier() -> u8 { + let x = &mut 8u8; + let raw = x as *mut u8; + aliasing(unsafe { &mut *raw }, x) +} +{% endhighlight %} + +Without barriers, there is no problem with this code because `aliasing` does not actually *use* the two aliasing pointers in conflicting ways. +With barriers, the initial retag of `ptr2` goes wrong because it tries to pop off the barrier that just got pushed for `ptr1`. +That's great, because we don't actually want to allow passing two aliasing pointers to `aliasing`. + +Barriers have one more effect: when memory gets deallocated, it must not have any active barriers left in its stacks. +Barriers so far ensure that a reference passed to a function does not get *invalidated* during that function call; this extra check ensures that it also does not get *deallocated*. +This is important, because we tell LLVM via the `dereferencable` attribute that the reference is, well, dereferencable for the duration of the function call. +To justify this attribute, we have to argue that the program really cannot deallocate this memory + + +### Intuition and Design Decisions + +One high-level intuition for barriers is that they encode the rule that a reference passed to a function outlives the function call. +The item matching that reference can never get popped while the function is still being executed: if it would get popped, we would hit the barrier and trigger UB. +This is also why we don't have barriers for `Box`: they don't have any such relationship to function calls. + +The other exception, about not having barriers for shared `UnsafeCell`, is more subtle. +In my [previous post]({% post_url 2018-11-16-stacked-borrows-implementation %}), I had an example of a function where a shared and a mutable reference alias (the shared reference being `&RefCell` and the mutable reference pointing into it). +This example crucially relies on the redundancy check during retagging, where the shared reference does *not* get reborrowed (which would invalidate the mutable reference). +If we wanted to add a barrier for that shared reference, we could not push it in top of the stack; we would have to add it somewhere in the middle next to the `Shr` item matching this reference. +For now, I am trying to avoid adding items to the middle of the stack. + +Another problem with shared `UnsafeCell` and barriers is summarized in [this issue](https://github.com/rust-lang/rust/issues/55005): in `Arc`, it can actually be the case that the memory referenced by a shared reference (but inside an `UnsafeCell`) gets deallocated while the function (that got this shared reference as argument) is still running. +This violated LLVM's expectations for `dereferencable` pointers, and it would also violate the rule that memory with active barriers cannot be deallocated. +We will have to either declare the current implementation of `Arc` incorrect or change the attributes that we use with LLVM. +The model currently takes the position that `Arc` is correct, deallocating data behind a shared `UnsafeCell` is allowed, and the attribute is emitted incorrectly. + +## Two-phase Borrows + +During the RustFest Rome, I was reminded that Stacked Borrows does not currently support [two-phase borrows](http://smallcultfollowing.com/babysteps/blog/2017/03/01/nested-method-calls-via-two-phase-borrowing/). +This means that the following code (which is safe code in the 2018 edition) would get rejected by Miri: +{% highlight rust %} +fn two_phase() { + let mut v = vec![]; + v.push(v.len()); +} +{% endhighlight %} + +To fix that, I just had to make a very tiny change. +Retagging now has a special mode for two-phase borrows (it would be used in the above example when retagging the temporary containing the first argument of `push`), and in that mode, after completing all the usual steps, we create a shared reborrow of the newly created mutable borrow, with all the usual effects (freezing or pushing a `Shr` onto the stack. +Thanks to the rule that reading through a mutable reference does not unfreeze or pop a `Shr` off the stack, this means that the original `v` can be used for reading until the time that the newly created (two-phase) borrow is written to for the first time. +Writing will unfreeze and pop the `Shr` off the stack, disallowing any further reads through other references. +This corresponds to the "activation" of a two-phase borrow. + +Unfortunately, this model is not quite enough to handle all the code that rustc currently accepts. +See [this issue](https://github.com/rust-lang/rust/issues/56254) for further discussion on this subject. + +## Stacked Borrows Specification + +Between the various changes described here, there is now a noticeable discrepancy between the only self-contained description of Stacked Borrows in my previous blog post(s) and what is actually implemented. +I will try to find a good spot to put a "living description" of whatever version of Stacked Borrows is currently implemented in Miri, so that there is more than just the code to look at. +I'll let you know when I found one! + +## Making Miri Easier to Use + +Finally, I decided to spend some time making Miri easier to install and use and fixing the problems with running test suites. +Now all you have to do is run + +``` +cargo +nightly install --git https://github.com/solson/miri/ miri +``` + +and then go to your project directory and run `cargo +nightly miri test` (you may have to `cargo clean` first to make sure all your dependencies are built the right way for Miri). +On first launch, Miri will (with your consent) install xargo and prepare a libstd suited for executing Rust programs in the interpreter, and then it will run your test suite. +@oli-obk is currently working on making this even easier by making Miri a component that you can install via `rustup`. +Give it a try and let us know what you think! -- 2.39.5 From 783a9c33e3429c2822ed1971c1985293e817be44 Mon Sep 17 00:00:00 2001 From: Ralf Jung Date: Wed, 26 Dec 2018 17:01:03 +0100 Subject: [PATCH 03/16] advertise Miri a bit more --- personal/_posts/2018-12-26-stacked-borrows-barriers.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/personal/_posts/2018-12-26-stacked-borrows-barriers.md b/personal/_posts/2018-12-26-stacked-borrows-barriers.md index 085a97a..e68663d 100644 --- a/personal/_posts/2018-12-26-stacked-borrows-barriers.md +++ b/personal/_posts/2018-12-26-stacked-borrows-barriers.md @@ -8,6 +8,9 @@ Neither project is by any means "done", of course. However, both have reached a fairly reasonable state, so I felt some kind of closing report made sense. Also, if I ever want to finish my PhD, I'll have to seriously scale down the amount of time I work on Rust -- so at least from my side, things will move more slowly from now on. +In particular, installing Miri and running your test suite in it is now just a single command away! +Scroll all the way down if you are not interested in the rest. + ## Stacked Borrows Tweaks -- 2.39.5 From 5d22b18eb3e59fdef57f2802ef46798984a6a80f Mon Sep 17 00:00:00 2001 From: Ralf Jung Date: Wed, 26 Dec 2018 17:03:23 +0100 Subject: [PATCH 04/16] add forum link --- personal/_posts/2018-12-26-stacked-borrows-barriers.md | 1 + 1 file changed, 1 insertion(+) diff --git a/personal/_posts/2018-12-26-stacked-borrows-barriers.md b/personal/_posts/2018-12-26-stacked-borrows-barriers.md index e68663d..a598903 100644 --- a/personal/_posts/2018-12-26-stacked-borrows-barriers.md +++ b/personal/_posts/2018-12-26-stacked-borrows-barriers.md @@ -1,6 +1,7 @@ --- title: "Barriers and Two-phase Borrows in Stacked Borrows" categories: internship rust +forum: https://internals.rust-lang.org/t/barriers-and-two-phase-borrows-in-stacked-borrows/9100 --- My internship ("research assistantship") with Mozilla has ended several weeks ago, and this post is a report of the most recent tweaks I made to Miri and Stacked Borrows. -- 2.39.5 From 53f2e9e7c8011a210ca1291cd1fdc57cfc5c919b Mon Sep 17 00:00:00 2001 From: Ralf Jung Date: Fri, 28 Dec 2018 18:15:43 +0100 Subject: [PATCH 05/16] fix installation command --- personal/_posts/2018-12-26-stacked-borrows-barriers.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/personal/_posts/2018-12-26-stacked-borrows-barriers.md b/personal/_posts/2018-12-26-stacked-borrows-barriers.md index a598903..d384a02 100644 --- a/personal/_posts/2018-12-26-stacked-borrows-barriers.md +++ b/personal/_posts/2018-12-26-stacked-borrows-barriers.md @@ -152,7 +152,7 @@ Finally, I decided to spend some time making Miri easier to install and use and Now all you have to do is run ``` -cargo +nightly install --git https://github.com/solson/miri/ miri +cargo +nightly install --force --git https://github.com/solson/miri miri ``` and then go to your project directory and run `cargo +nightly miri test` (you may have to `cargo clean` first to make sure all your dependencies are built the right way for Miri). -- 2.39.5 From bd2acd148e15d0a8d0ae93ac6cbc56b110934087 Mon Sep 17 00:00:00 2001 From: Ralf Jung Date: Sat, 12 Jan 2019 14:09:18 +0100 Subject: [PATCH 06/16] Rust 2019 --- personal/_posts/2019-01-12-rust-2019.md | 76 +++++++++++++++++++++++++ 1 file changed, 76 insertions(+) create mode 100644 personal/_posts/2019-01-12-rust-2019.md diff --git a/personal/_posts/2019-01-12-rust-2019.md b/personal/_posts/2019-01-12-rust-2019.md new file mode 100644 index 0000000..05b4812 --- /dev/null +++ b/personal/_posts/2019-01-12-rust-2019.md @@ -0,0 +1,76 @@ +--- +title: "Rust 2019: Solid Foundations for Unsafe Code" +categories: rust +--- + +It is that time of the year again where our feedreaders are filled with people's visions for what Rust should become in the next year. +Coming up with a vision is not exactly my strong suit, but it is probably something I should learn, so here's my episode of the "Rust 2019" blog post series. + +I think in 2019, we should make a coordinated effort to improving the *foundations for writing unsafe code*. +Of course my particular Rust bubble is mostly about unsafe code, as you will know if you have read some of my previous posts -- but safety is a core value proposition of Rust, and the entire ecosystem rests on a foundation of crates that make heavy use of unsafe code, so I believe that caring about unsafe code is an important piece to Rust's overall success. + + + +## Foundations for unsafe code + +What do I mean by "improving the foundations" of unsafe code? +There are many questions that frequently come up when writing unsafe code, like: + +* What are the rules around handling uninitialized or otherwise invalid data? +* How can I create pointers to fields of packed structs? +* When can I turn raw pointers into references, and what can I do with these references? +* When do I need `UnsafeCell`? +* What can I do with a `union`? + +I am sure there are more, these are just some questions that regularly show up in my inbox because someone @mentions me. +What all of these questions have in common is that *we don't know the full answer*, and even the parts of the answer that we do know are hard to figure out for someone who's new to this. + +So a key ingredient to providing better foundations for writing unsafe code is to start answering some of these questions, and to figure out which other questions need answering. +One issue that any attempt to answer any one of these questions precisely will run into is that we do not have a proper specification for what a Rust program *does* when it is executed -- the semantics of Rust are basically defined by "whatever the generated LLVM IR does", and that's not a very solid foundation at all. +As a consequence, we often even lack the terminology to make precise statements about what unsafe code can and cannot do. +What we need is a specification of at least a large enough fragment of Rust to serve as the framework in which these other discussions can take place. + +But that's just the beginning: we can't just dump a bunch of rules onto programmers and let them deal with the rest. +We need to help them write code that follows the rules. +To this end, we need more documentation and teaching material -- things like the [Rustonomicon](https://doc.rust-lang.org/nightly/nomicon/). + +But I strongly believe that we can't leave it at that. +We should also provide *tooling* that helps programmers check their code for rule violations. +I believe that this is extremely important: it does not just help programmers increase confidence in their unsafe code, it also helps them learn what the rules even are. +We all learn by making mistakes, but that only works if you *know* that you made a mistake -- and with unsafe code, that's very hard to figure out. +And, last but not least, creating such tooling will uncover ambiguities in the rules and raise new questions about corner cases that we forgot to consider, and thus help flesh out the rules themselves. + +## Where are we at? + +Some of the things I described above are already happening, but things are moving slowly and more hands are always needed! + +The [unsafe code guidelines effort](https://github.com/rust-rfcs/unsafe-code-guidelines/) (UCG) is slowly but steadily chipping away at the kind of questions I raised, determining whether we can have a consensus among the small group of people that is actively participating in these discussions. +This is an interesting mix of descriptive and normative work: we are trying to cater patterns that already existing unsafe code follows and cast them into rules that future code will have to follow. +Of course, anything we do still has to go through the RFC process before it becomes normative, and I am sure new interesting questions will come up then. +Right now, we are discussing ["validity invariants"](https://github.com/rust-rfcs/unsafe-code-guidelines/blob/master/active_discussion/validity.md), which describe the assumptions the compiler is allowed to make about the contents of a variable based on its type. +This just started, so now is a perfect time to join the effort! + +A [new approach for handling uninitialized data](https://github.com/rust-lang/rust/issues/53491) is available in nightly, replacing the old `mem::uninitialized` that turned out to be impossible to design reasonable rules for. +At this point, this is mostly blocked on [bikeshedding the API](https://github.com/rust-lang/rust/pull/56138) and figuring out whether we want to allow or forbid unsafe code to create references to uninitialized data (without reading from them) -- the latter being one of the most interesting open questions around "validity invariants" that the UCG is discussing. + +There is an [accepted RFC](https://github.com/rust-lang/rfcs/blob/master/text/2514-union-initialization-and-drop.md) for resolving the open questions around the interaction of `union` and `Drop`, and a [work-in-progress partial implementation](https://github.com/rust-lang/rust/pull/56440) of that RFC. + +There is an [RFC under discussion](https://github.com/rust-lang/rfcs/pull/2582) for resolving the situation around references to fields of packed structs, that also helps with other questions around references in unsafe code. +This RFC could need some pushing over the finish line, and then it needs implementing. + +In terms of tooling, there is [Miri](https://github.com/solson/miri/) (also available on the [Rust Playground](https://play.rust-lang.org/)), which could need quite some love to improve the error messages, to track more information during execution for better diagnostics, to make it run faster and to support more things that test suites often do (like panics, or seeding RNGs from the OS). + +Rust also, in principle, supports some of the LLVM sanitizers, namely asan, tsan, msan and lsan. +However, using them [still seems to run into some issues](https://github.com/google/mundane/issues/15), and the [rust-san project](https://github.com/japaric/rust-san) seems to lay dormant. +We should aim at making it standard practice for crates to run these sanitizers as part of their CI. +Even if the crate doesn't use unsafe code itself, it might have a dependency that does, so using the sanitizer here helps improve the test coverage of that dependency. +This won't catch violations of Rust-specific rules, and to my knowledge these sanitizers all have false negatives (meaning they miss bugs in their domain), but they *do* find many issues and that goes a long way. + +Of course, it would be awesome to have a Rust-specific sanitizer as well. +There is some low-hanging fruit here, like having a way to compile your program with assertions guarding unchecked slice accesses, or verifying that pointers are sufficiently aligned -- and then there is a long list of interesting checks that are increasingly hard to implement. + +And then there are all the things I did not think of that could help unsafe code authors do their work with confidence, and reduce the number of mistakes that people inevitably make when writing unsafe code. +I am sure there is a lot of possibility here for API design to improve the overall reliability of unsafe code. + +So, as you can see, many things are happening, so I think there is some real chance that we can make serious progress on this topic in 2019. +Let's make it happen, together! -- 2.39.5 From ca23ad10f06218fd7c963557e3218a8d8db0658c Mon Sep 17 00:00:00 2001 From: Ralf Jung Date: Tue, 12 Feb 2019 09:56:31 +0100 Subject: [PATCH 07/16] all-hands post --- personal/_posts/2019-02-12-all-hands-recap.md | 70 +++++++++++++++++++ 1 file changed, 70 insertions(+) create mode 100644 personal/_posts/2019-02-12-all-hands-recap.md diff --git a/personal/_posts/2019-02-12-all-hands-recap.md b/personal/_posts/2019-02-12-all-hands-recap.md new file mode 100644 index 0000000..36a6be3 --- /dev/null +++ b/personal/_posts/2019-02-12-all-hands-recap.md @@ -0,0 +1,70 @@ +--- +title: "All-Hands 2019 Recap" +categories: rust +--- + +Last week, I was in Berlin at the Rust All-Hands 2019. +It was great! +I will miss nerding out in discussions about type theory and having every question answered by just going to the person who's the expert in that area, and asking them. +In this post, I am summarizing the progress we made in my main areas of interest and the discussions I was involved in---this is obviously just a small slice of all the things that happened. + + + +## Validity Invariants and `MaybeUninit` + +We had a session to talk about [validity invariants](https://github.com/rust-rfcs/unsafe-code-guidelines/blob/master/active_discussion/validity.md) ([meeting notes here](https://paper.dropbox.com/doc/Topic-Validity-Invariants--AXWuzG6GFwiTyUDukuPV7vDmAg-Wpl6mhrqNBStyrCHwhpYI#:uid=357396828564014720965680&h2=NOTES)). +No firm conclusions were reached, but we got input from people that haven't been part of the discussions inside the UCG (unsafe code guidelines) WG yet and that was very interesting. +It seems that deciding about the [validity invariant for references](https://github.com/rust-rfcs/unsafe-code-guidelines/issues/77) and (to a lesser extend) the [validity invariant for unions](https://github.com/rust-rfcs/unsafe-code-guidelines/issues/73) will be a slow process---there are lots of different options, and some of the trade-offs come down to prioritizing one value over another (like prioritizing checkable UB over optimizations, or vice versa). +Probably the closest to a conclusion we reached was that [uninitialized integers should probably not be UB](https://github.com/rust-rfcs/unsafe-code-guidelines/issues/71#issuecomment-460599057) until you perform any operation on them. + +That said, it also got clear that the sooner we can stabilize (parts of) [`MaybeUninit`](https://github.com/rust-lang/rust/issues/53491), the better. +And as @japaric pointed out, we do not have to wait until all of these questions get answered! +We can stabilize a subset of the API, and leave (in particular) `get_ref` and `get_mut` out of that. +I think the biggest blocker for that is to get [RFC 2582](https://github.com/rust-lang/rfcs/pull/2582) accepted (which is very close to beginning its FCP). +Once that is done, I think I'll just beef up the documentation a bit and submit a stabilization PR. + +What worries me a bit is that we did not get much feedback from people using the API. +It seems like everyone is waiting for it to become stable, but of course then it will be too late to fix API mistakes that we made! +Given the importance of the API for certain classes of unsafe code, I think it would be great to get some more feedback before we set things in stone. + +To finish up the API, I went ahead and renamed the method that you call when a `MaybeUninit` is fully initialized to [`into_initialized`](https://doc.rust-lang.org/nightly/std/mem/union.MaybeUninit.html#method.into_initialized), which seems, uh, good enough? +I also like `assume_initialized` but that does not sound like it would return anything. +One thing I wonder about is whether some of these methods should be "downgraded" to functions, forcing callers to write `MaybeUninit::into_initialized` or so. +If you have an opinion on this, please [join us in the tracking issue](https://github.com/rust-lang/rust/issues/53491)! + +## Uninitialized Memory + +@stjepang, @Amanieu and me talked a bit about [uninitialized data in atomic operations](https://github.com/crossbeam-rs/crossbeam/issues/315), and how to make sense of the [C++ spec](https://en.cppreference.com/w/cpp/atomic/atomic/compare_exchange), in particular in the presence of mixed atomic and non-atomic accesses to the same object using [`atomic_ref`](https://en.cppreference.com/w/cpp/atomic/atomic_ref). +We did not make any notable progress, other than the possibility of a sound `AtomicCell` *without* `get_mut`. + +Another discussion was around using [`Read::read`](https://doc.rust-lang.org/nightly/std/io/trait.Read.html#tymethod.read) to initialize memory. +The problem here is that with an *unknown* `Read` implementation, we cannot just pass in an uninitialized buffer. +Uninitialized data is strictly different from any particular bit pattern, and using it in any non-trivial way is UB. +Since there is no way to know what the unknown `Read::read` function does, we cannot pass such bad data to it. +In [my terminology](({% post_url 2018-08-22-two-kinds-of-invariants%})), even though integers with uninitialized data are (likely) *valid*, uninitialized data violates the *safety invariant* of our integer types. +To solve this, the proposal is to add a [`freeze` method](https://github.com/rust-lang/rust/pull/58363) that turns all uninitialized memory into arbitrary but fixed bits. +Such frozen memory admits fewer optimizations (because it must observably have a consistent value when accessed multiple times), but it is also safe to use at any integer type (for the same reason).
+The main reason I like this proposal is that it officially acknowledges the subtle but important distinction between "arbitrary bit pattern" and "uninitialized", and that should help a lot with teaching these concepts to unsafe code authors. + +## Stacked Borrows + +Finally, there was some discussion about [Stacked Borrows](https://github.com/RalfJung/unsafe-code-guidelines/blob/stacked-borrows/wip/stacked-borrows.md) ([meeting notes here](https://paper.dropbox.com/doc/Topic-Stacked-borrows--AXUlwiiCbkUsrhfLWPy43~yiAg-2q57v4UM7cIkxCq9PQc22#:uid=304192866787878821395738&h2=NOTES)). +The feedback was very positive, and (to my surprise) everybody agreed that the things I had to change in the standard library to make it conform with the model were appropriate bugfixes. +@arielb1 and @matthewjesper helped a lot with detailed discussions about some of the [remaining issues](https://github.com/rust-rfcs/unsafe-code-guidelines/issues?q=is%3Aissue+is%3Aopen+label%3Atopic-stacked-borrows) in the model, as well as figuring out a plan to fix a problem [found by @Amanieu](https://github.com/solson/miri/issues/615). +However, [two-phase borrows remain a problem](https://github.com/rust-lang/rust/issues/56254). + +## Miri now tests libcore and liballoc + +I am particularly happy about the progress I made on [Miri](https://github.com/solson/miri/) during this week. +With help from @eddyb and @alexcrichton, I got Miri to run the libcore and liballoc unit test suites. +This has already helped to [uncover a bug](https://github.com/rust-lang/rust/pull/58200) and some [subtle undocumented invariants](https://github.com/rust-lang/rust/issues/58320) in the standard library, as well as [several](https://github.com/solson/miri/pull/625) bugs [in Miri](https://github.com/rust-lang/rust/pull/58324). +I have [set things up](https://github.com/RalfJung/miri-test-libstd) such that Miri will run these tests every night, giving us higher confidence that the standard library is free of undefined behavior---or rather, that the parts of the standard library that are covered by unit tests are free of undefined behavior that Miri can detect. +I've been partially working towards this goal for several months now, so it is really satisfying to see it all come together. :-) + +Moreover, @Amanieu ran Miri on hashbrown, not discovering a bug in his crate but running into [several](https://github.com/solson/miri/issues/612) bugs [in Miri](https://github.com/solson/miri/issues/614) which I then fixed. + +And last but not least, Miri can now [pass arguments to the interpreted program](https://github.com/solson/miri/pull/624), which is particularly useful when running test suites to run only the test one is currently debugging. + +I think that's it! +Lots of exciting progress as well as lots of grounds for further discussion. +This won't get boring any time soon. :D -- 2.39.5 From 968bde6239655cae47a199b516421f1b2c59b8ab Mon Sep 17 00:00:00 2001 From: Ralf Jung Date: Tue, 12 Feb 2019 09:59:25 +0100 Subject: [PATCH 08/16] add Reddit link --- personal/_posts/2019-02-12-all-hands-recap.md | 1 + 1 file changed, 1 insertion(+) diff --git a/personal/_posts/2019-02-12-all-hands-recap.md b/personal/_posts/2019-02-12-all-hands-recap.md index 36a6be3..59e01b7 100644 --- a/personal/_posts/2019-02-12-all-hands-recap.md +++ b/personal/_posts/2019-02-12-all-hands-recap.md @@ -1,6 +1,7 @@ --- title: "All-Hands 2019 Recap" categories: rust +reddit: /rust/comments/apreqi/ucgmiri_allhands_2019_recap/ --- Last week, I was in Berlin at the Rust All-Hands 2019. -- 2.39.5 From 7fad530d10920cdd874584d4111bcb046ffad34c Mon Sep 17 00:00:00 2001 From: Ralf Jung Date: Fri, 15 Feb 2019 10:10:57 +0100 Subject: [PATCH 09/16] update room number --- research/contact.html | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/research/contact.html b/research/contact.html index e3e0052..0424372 100644 --- a/research/contact.html +++ b/research/contact.html @@ -14,7 +14,7 @@ sort: 2

Work Address

-Room 438
+Room 336
MPI-SWS (Campus E1.5)
66123 Saarbrücken
Germany -- 2.39.5 From 0d8a88849dca11fabbde33c42fb83ebdafd5d5de Mon Sep 17 00:00:00 2001 From: Ralf Jung Date: Sat, 9 Mar 2019 12:58:36 +0100 Subject: [PATCH 10/16] fix pin API doc links --- personal/_posts/2018-04-05-a-formal-look-at-pinning.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/personal/_posts/2018-04-05-a-formal-look-at-pinning.md b/personal/_posts/2018-04-05-a-formal-look-at-pinning.md index 121de4b..72dccc1 100644 --- a/personal/_posts/2018-04-05-a-formal-look-at-pinning.md +++ b/personal/_posts/2018-04-05-a-formal-look-at-pinning.md @@ -44,7 +44,7 @@ The core piece of the pinning API is a new reference type `Pin<'a, T>` that guar Crucially, **pinning does not provide immovable types**! Data is only pinned after a `Pin` pointing to it has been created; it can be moved freely before that happens. -The [corresponding RFC](https://github.com/rust-lang/rfcs/blob/master/text/2349-pin.md) explains the entirey new API surface in quite some detail: [`Pin`](https://doc.rust-lang.org/nightly/std/mem/struct.Pin.html), [`PinBox`](https://doc.rust-lang.org/nightly/std/boxed/struct.PinBox.html) and the [`Unpin`](https://doc.rust-lang.org/nightly/std/marker/trait.Unpin.html) marker trait. +The [corresponding RFC](https://github.com/rust-lang/rfcs/blob/master/text/2349-pin.md) explains the entirey new API surface in quite some detail: [`Pin`](https://doc.rust-lang.org/1.27.0/std/mem/struct.Pin.html), [`PinBox`](https://doc.rust-lang.org/1.27.0/std/boxed/struct.PinBox.html) and the [`Unpin`](https://doc.rust-lang.org/1.27.0/std/marker/trait.Unpin.html) marker trait. I will not repeat that here but only show one example of how to use `Pin` references and exploit their guarantees: {% highlight rust %} #![feature(pin, arbitrary_self_types, optin_builtin_traits)] -- 2.39.5 From 8450dcc0578ee8f2187d05645d93066866d41310 Mon Sep 17 00:00:00 2001 From: Ralf Jung Date: Sat, 9 Mar 2019 22:44:33 +0100 Subject: [PATCH 11/16] add firejail post --- personal/_posts/2019-03-09-firejail.md | 91 ++++++++++++++++++++++++++ 1 file changed, 91 insertions(+) create mode 100644 personal/_posts/2019-03-09-firejail.md diff --git a/personal/_posts/2019-03-09-firejail.md b/personal/_posts/2019-03-09-firejail.md new file mode 100644 index 0000000..6f16557 --- /dev/null +++ b/personal/_posts/2019-03-09-firejail.md @@ -0,0 +1,91 @@ +--- +title: "Sandboxing All The Things with Firejail" +categories: sysadmin +--- + +Sometimes, I run software that I trust less. +All software I use on a daily basis is open-source, but there is still closed-source software I run occasionally, like video games. +Unfortunately, the usual Unix security model does not protect against such software misbehaving, as illustrated by [this XKCD](https://www.xkcd.com/1200/). +There is no good reason that these applications should have access to all my personal information and secret keys that I have stored on my system, but without proper application isolation (like we are used to on mobile devices nowadays) nothing stops them from leaking all this data. +I didn't want to wait until such technology becomes the default on Linux desktops, so I did some research for ways to sandbox existing applications and ended up with [Firejail](https://github.com/netblue30/firejail). + + + +Firejail is packaged for Debian and other distributions, so you should be able to install it with your distribution's package manager. +It runs an application in a sandbox, the details of which are configured in a *profile*. +Firejail comes with profiles for many applications. +For example, `firejail firefox` will start Firefox inside the sandbox. +This adds an extra layer of security around Firefox' security mechanisms. + +However, I have found that my setup is sufficiently non-standard that I often have to customize the profiles. +The most complex piece of the profile is to configure which parts of the file system the application will be able to access. +Unfortunately, the configuration language can be quite hard to use---it is often difficult to find out why access to a particular file does not work. +Also, the whitelisting mechanism is fairly inflexible: you cannot control which directory gets whitelisted; instead, when you whitelist something in `/home/user/foo`, whitelisting gets turned on for all of `/home` (and similar for a bunch of other hard-coded "whitelisting roots"). +But with some experimentation, you'll get things to work eventually. + +For example, if there are extra directories that no sandboxed application should be able to access, you can add them in `/etc/firejail/disable-common.local`. +I am blacklisting all my personal files: +``` +# Personal files +blacklist ${HOME}/Documents +``` +This will affect all sandboxes, because `disable-common.inc` is included pretty much everywhere, and that includes `disable-common.local`. + +## Custom Profiles + +Firejail cannot come with profiles for all applications, so sometimes you have to write your own. +For example, I use one profile for all video games as well as TeamSpeak, which is based on the profile for Steam and looks like this (I store it in `/etc/firejail/gaming.profile`): +``` +# Steam profile (applies to games/apps launched from Steam as well) +noblacklist ${HOME}/.steam +noblacklist ${HOME}/.steampath +noblacklist ${HOME}/.steampid +noblacklist ${HOME}/.local/share/steam +noblacklist ${HOME}/.wine +noblacklist ${HOME}/.ts3client +include /etc/firejail/disable-common.inc +include /etc/firejail/disable-programs.inc +include /etc/firejail/disable-passwdmgr.inc + +caps.drop all +netfilter +nonewprivs +noroot +protocol unix,inet,inet6,netlink +# wine/games don't work properly with seccomp +#seccomp + +# give access to some directories +read-write ${HOME}/.cache/winetricks +read-write ${HOME}/.ts3client +read-write ${HOME}/.steam +read-write ${HOME}/bin/teamspeak3 +whitelist /mnt/store/r/games +# -> implicitly, the rest of /mnt is blocked +``` +Obviously, you will have to adjust the paths. +If one particular application needs something that got globally blacklisted, you can use `noblacklist`. +However, you might also have to `whitelist` it if something else sets up a whitelist for this directory. +Also, there seems to be no way to have something on the `blacklist` globally but then make it `read-only` for a particular sandbox only. +(This is what I mean by the configuration language being hard to use.) + +Next, it is a good idea to test the profile. +You can start a shell inside the sandbox with: +``` +firejail --profile=/etc/firejail/gaming.profile bash +``` +Now you can do things like `ls ~/.ssh` to make sure your secret keys are not accessible in the sandbox. +Also test accessing the directories where the games are stored. +Use `exit` to leave the sandbox. + +I have created a file `~/bin/gamejail` to conveniently start an application inside the sandbox: +``` +#!/bin/bash +exec firejail --profile=/etc/firejail/gaming.profile "$@" +``` +Now you just have to wrap the call to run the application inside `gamejail`, for example `gamejail steam`. + +That's all I got. +I hope this little tutorial helps you to make your system a bit more secure by restricting application privileges. +Firejail is far from perfect, but it works quite well and it is extremely customizable. +In case you have trouble configuring the sandbox properly, I have found it very helpful to look at all the examples that are shipped in `/etc/firejail`. -- 2.39.5 From 1d494df45355a7d20ed35e71848b776c9d279210 Mon Sep 17 00:00:00 2001 From: Ralf Jung Date: Sun, 10 Mar 2019 20:29:45 +0100 Subject: [PATCH 12/16] read_ref doe not need to mutate thanks to @amosonn! --- personal/_posts/2018-04-05-a-formal-look-at-pinning.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/personal/_posts/2018-04-05-a-formal-look-at-pinning.md b/personal/_posts/2018-04-05-a-formal-look-at-pinning.md index 72dccc1..5e51e7c 100644 --- a/personal/_posts/2018-04-05-a-formal-look-at-pinning.md +++ b/personal/_posts/2018-04-05-a-formal-look-at-pinning.md @@ -68,11 +68,11 @@ impl SelfReferential { fn init(mut self: Pin) { let this : &mut SelfReferential = unsafe { Pin::get_mut(&mut self) }; // Set up self_ref to point to this.data. - this.self_ref = &mut this.data as *const i32; + this.self_ref = &this.data as *const i32; } - fn read_ref(mut self: Pin) -> Option { - let this : &mut SelfReferential = unsafe { Pin::get_mut(&mut self) }; + fn read_ref(self: Pin) -> Option { + let this : &SelfReferential = &*self; // Dereference self_ref if it is non-NULL. if this.self_ref == ptr::null() { None -- 2.39.5 From eb564f575a912b04e241cdd1a284f716aabbe3fa Mon Sep 17 00:00:00 2001 From: Ralf Jung Date: Tue, 26 Mar 2019 21:07:52 +0100 Subject: [PATCH 13/16] miri on rustup --- .../2019-03-26-miri-as-rustup-component.md | 27 +++++++++++++++++++ 1 file changed, 27 insertions(+) create mode 100644 personal/_posts/2019-03-26-miri-as-rustup-component.md diff --git a/personal/_posts/2019-03-26-miri-as-rustup-component.md b/personal/_posts/2019-03-26-miri-as-rustup-component.md new file mode 100644 index 0000000..a1ffe4f --- /dev/null +++ b/personal/_posts/2019-03-26-miri-as-rustup-component.md @@ -0,0 +1,27 @@ +--- +title: "Miri available as rustup component" +categories: rust +--- + +Running your unsafe code test suite in Miri has just gotten even easier: Miri is now available as a `rustup` component! +Huge thanks to @oli-obk and @mati865 who made this happen. + +Miri can run your test suite in a very slow, interpreted mode that enables it to test for undefined behavior: if an out-of-bounds array access happens, uninitialized memory gets used the wrong way or a dangling raw pointer gets dereferenced, Miri will notice and tell you. +However, Miri cannot execute all programs, and it also cannot detect all forms of misbehavior. +For further information, please check out [Miri's README](https://github.com/rust-lang/miri#readme). + + + +Installation (only on nightly toolchains) is now as simple as +``` +rustup component add miri +``` +After installing Miri, you can run your crate's test suite in it with `cargo miri test`. +I suggest you do `cargo clean` first because Miri needs to build its own standard library, and rustc can get confused when crates built with different standard libraries get mixed. +If you have `#[should_panic]` tests, try `cargo miri test -- -- -Zunstable-options --exclude-should-panic` because Miri currently aborts execution on a panic. + +There's a lot of work left to be done, in particular to enable more programs to execute in Miri. +Still, slowly but steadily, my [vision]({% post_url 2017-05-23-internship-starting %}) of Miri as a practical tool to test for undefined behavior is actually becoming reality: [the standard library](https://github.com/RalfJung/miri-test-libstd) and [hashbrown](https://github.com/Amanieu/hashbrown/) have their test suites running in Miri under CI. +I cannot express how glad it makes me to be able to contribute to the Rust ecosystem becoming a bit safer. + +Maybe your crate is next? -- 2.39.5 From cb3f4d1d58b9731431862bfa9f4382f5344540f9 Mon Sep 17 00:00:00 2001 From: Ralf Jung Date: Tue, 26 Mar 2019 21:28:30 +0100 Subject: [PATCH 14/16] link to reddit --- personal/_posts/2019-03-26-miri-as-rustup-component.md | 1 + 1 file changed, 1 insertion(+) diff --git a/personal/_posts/2019-03-26-miri-as-rustup-component.md b/personal/_posts/2019-03-26-miri-as-rustup-component.md index a1ffe4f..0c67d2f 100644 --- a/personal/_posts/2019-03-26-miri-as-rustup-component.md +++ b/personal/_posts/2019-03-26-miri-as-rustup-component.md @@ -1,6 +1,7 @@ --- title: "Miri available as rustup component" categories: rust +reddit: /rust/comments/b5um0v/miri_available_as_rustup_component/ --- Running your unsafe code test suite in Miri has just gotten even easier: Miri is now available as a `rustup` component! -- 2.39.5 From 48530f06c08afe1197178213be5ac1156db51809 Mon Sep 17 00:00:00 2001 From: Ralf Jung Date: Tue, 30 Apr 2019 21:10:35 +0200 Subject: [PATCH 15/16] Stacked Borrows 2 --- .../_posts/2019-04-30-stacked-borrows-2.md | 368 ++++++++++++++++++ personal/_sass/_base.scss | 6 + 2 files changed, 374 insertions(+) create mode 100644 personal/_posts/2019-04-30-stacked-borrows-2.md diff --git a/personal/_posts/2019-04-30-stacked-borrows-2.md b/personal/_posts/2019-04-30-stacked-borrows-2.md new file mode 100644 index 0000000..87617ea --- /dev/null +++ b/personal/_posts/2019-04-30-stacked-borrows-2.md @@ -0,0 +1,368 @@ +--- +title: "Stacked Borrows 2" +categories: rust research +--- + +Recently, I have [significantly updated](https://github.com/rust-lang/miri/pull/695) Stacked Borrows in order to fix some issues with the handling of shared references that were uncovered in the previous version. +In this post, I will describe what the new version looks like and how it differs from [Stacked Borrows 1]({% post_url 2018-11-16-stacked-borrows-implementation %}). +I assume some familiarity with the prior version and will not explain everything from scratch. + + + +## The problem + +The problem I wanted to solve was that the first version of Stacked Borrows only performed very little tracking of shared references. +My thinking was, if the location is read-only anyway, then it does not harm to grant anyone read access. +However, [as @arielby noted](https://github.com/rust-lang/unsafe-code-guidelines/issues/87), this leads to loss of optimization potential in cases where a function receives a mutable reference (which is supposed to have no aliases) and then creates a shared reference from it: +{% highlight rust %} +fn main() { + let x = &mut 0u32; + let p = x as *mut u32; + foo(x, p); +} + +fn foo(a: &mut u32, y: *mut u32) -> u32 { + *a = 1; + let _b = &*a; // This freezes `x`. Frozen locations can be read by any raw pointer. + unsafe { *y = 2; } // Hence, this legal in Stacked Borrows. + return *a; // But we want to optimize this to always return 1! +} +{% endhighlight %} +Stacked Borrows 1 allowed any raw pointer to read a frozen location. +Basically, once a location is frozen we do not keep track of which pointer is allowed to read; we just allow all pointers to read. +However, that means that `&*a` above (reborrowing a mutable reference as a shared reference) has the unintended side-effect of permitting raw pointers to access `*a`! +This violates the idea that `a` is a unique pointer: we'd like to assume that `foo` can only ever return `1`, because `y` cannot possibly alias with `a`. +Hence calling `foo` like we do here with `a` and `y` pointing to the same thing must be undefined behavior---and yet, Stacked Borrows 1 considered the example above to be legal. + +## The solution + +To fix this, I had to replace the mechanism of "freezing" by something else. +Remember that in Stacked Borrows 1, when a shared reference was created, we stored the timestamp at which that happened and also recorded in memory that the location this reference points to is frozen since now. +Whenever a shared reference gets used, we check that the location is frozen *at least* since our reference got created. +This is in contrast to mutable references, where we require the exact unique ID of that reference to be present in the borrow stack. + +To rule out cases like the example above, instead of freezing a location and allowing *all* shared reference created henceforth to access this location, we keep track precisely of *which* shared references are allowed access. +The per-location borrow stack now consists of items that grant a particular *permission* to a pointer identified by a particular *tag*: +{% highlight rust %} +pub struct Item { + /// The pointers the permission is granted to. + tag: Tag, + /// The permission this item grants. + perm: Permission, +} + +pub enum Permission { + /// Grants unique mutable access. + Unique, + /// Grants shared mutable access. + SharedReadWrite, + /// Greants shared read-only access. + SharedReadOnly, +} +{% endhighlight %} +There is no longer a separate "frozen" state; keeping shared references read-only is now handled by the borrow stack itself: +{% highlight rust %} +pub struct Stack { + borrows: Vec +} +{% endhighlight %} +The *tag* is also simpler than it was before: there are no longer separate tags for mutable and shared references. +{% highlight rust %} +pub type PtrId = NonZeroU64; +pub enum Tag { + Tagged(PtrId), + Untagged, +} +{% endhighlight %} +Just like before, a new `Tag` gets picked on every retagging; in particular whenever a reference gets created with `&mut `/`& ` and when a reference gets cast to a raw pointer. +Other than that (like on pointer arithmetic), the tag just gets propagated to keep track of where this pointer comes from. +However, unlike before, the only difference between mutable and shared references is the permissions that are associated with that tag. + +### Memory accesses + +But before we go into creation of new references, let us look at how permissions in the borrow stack affect what happens at a memory access is legal. +So assume that some pointer tagged `tag` is used to either read from or write to memory. +For each affected location, we go through two steps: first we try to find the *granting item*, then we remove *incompatible items*. + +**Finding the granting item.** +To find the granting item, we traverse the borrow stack top-to-bottom and search for an item that has the same tag. +If this is a write access, we go on looking until we find an item with the right tag whose permission is `SharedReadWrite` or `Unique`---we do not consider items with `SharedReadOnly` permission to grant a write access. +Once we found the granting item, we remember its position in the stack; that will be important for the second step. +If no granting item can be found, the access causes undefined behavior. + +For example, if the stack is +``` +[ Item { tag: Tagged(0), perm: Unique }, + Item { tag: Tagged(1), perm: SharedReadOnly} ] +``` +then a read access with tag `Tagged(1)` is granted by the item at index 1; a read/write access with tag `Tagged(0)` is granted by the item at index 0, but a write access with tag `Tagged(1)` is not granted by any item (and thus not allowed). +Just to keep things shorter and easier to read, in the following I will use a short-hand syntax for writing down items, so the above stack would look as follows: +``` +[ (0: Unique), (1: SharedReadOnly) ] +``` + +To consider a second example, if the stack is +``` +[ (0: Unique), (Untagged: SharedReadWrite), (Untagged: SharedReadOnly) ] +``` +then a read access with an `Untagged` pointer is granted by the item at index 2, but a write access with an `Untagged` pointer is only granted by the item at index 1. + +**Removing incompatible items.** +In the second step, we traverse all the items *above* the granting item in the stack, and see if they are compatible with this access. +This realizes the idea (already present in the original Stacked Borrows) that using one pointer disallows future uses of another pointer. +For example, if the current stack is +``` +[ (0: Unique), (1: Unique) ] +``` +then doing any kind of access with a `Tagged(0)` pointer should remove the `1: Unique` item from the stack. +This matches the part of Stacked Borrows 1 where items got popped off the stack until the granting item is at the top. +We say that the granting `Unique` permission is *incompatible* with the `Unique` permission of the item higher up the stack, and hence the latter must be removed. + +However, in the new model, we don't always remove *all* items that are above the granting item: +``` +[ (0: Unique), (1: SharedReadOnly), (2: SharedReadOnly) ] +``` +In a situation like this, there are two pointers that may be used for reading, `Tagged(1)` and `Tagged(2)`. +Using either of them should not have an impact on the other one, and the fact that their items are in a particular order on the stack has no impact. +In other words, a granting `SharedReadOnly` permission is *compatible* with other `SharedReadOnly` permissions, and hence when using a `SharedReadOnly` permission to grant an access, other `SharedReadOnly` permissions above it are maintained. + +Sometimes, being compatible depends on the kind of access that was performed: +in our last example stack, if we *write* with a `Tagged(0)` pointer, that should remove the `SharedReadOnly` tags because writing to a unique pointer should make sure that that pointer is at the top of the stack. +In other words, on a write, a `Unique` permission is incompatible with everything. +However, if we just *read* with a `Tagged(0)` pointer, that's fine! +This is to allow [safe code like this](https://play.rust-lang.org/?version=stable&mode=debug&edition=2018&gist=d9aa504b1be72a0f55fb5a744cba69ff) that interleaves reads from aliasing mutable and shared references. +To enable this, on a read, a `Unique` permission is only incompatible with other `Unique` permissions. + +The following table fully documents which permissions are compatible with which other permissions on a read or write access, respectively: + +|↓ **Granting item** \ **Compatible with** → | `Unique` | `SharedReadWrite` | `SharedReadOnly` | +|---:|---|---|---| +|`Unique` | no | only reads | only reads | +|`SharedReadWrite` | no | yes | only reads | +|`SharedReadOnly` | no | no | only reads | + +So, for example, if the granting item has `SharedReadWrite` permissions, then all `Unique` above it in the stack will be removed, and moreover on a write, all `SharedReadOnly` above it will be removed. +This makes sure that the pointers for which those `SharedReadOnly` permission grants read access cannot be used any more: those pointers assumed the location was read-only, so we must make sure that after a write, they become invalid. + +The name "stack" is a bit of a misnomer at this point, because we do *not* follow a stack discipline when removing items: just because some item got removed due to incompatibility, does not mean all items above it will also get removed. +I will discuss [later][no-stack] why a strict stack discipline does not work. +But I felt it would be even more confusing to rename the entire thing at this point, and the order of elements in the "stack" still *does* matter, so I kept the name. + +In Stacked Borrows 2, there is no more action occurring when a pointer gets dereferenced: the tags only matter for the actual access. +This simplification is possible because the rules for accesses are now more complicated than before, so there is no need for extra checks on the dereference operation. +(In particular, previously the dereference checks relied on knowing the Rust type at which the access happens for proper handling of interior mutability; but now all that information is encoded using `SharedReadWrite`/`SharedReadOnly` tags as we will see. Not relying on types any more here also [fixed an annoying issue around `UnsafeCell`](https://github.com/rust-lang/miri/issues/615).) + +### Retagging: when permissions get added + +The framework of permissions in a stack with a notion of (in)compatibility that we have seen so far allows us to express some ideas like: + +* "A `Unique` pointer will get invalid when any of its parent pointers get used."
That is realized by having "no" in the entire first column of our table: every access removes all `Unique` permissions above the granting item. +* "A `SharedReadOnly` pointer will get invalid when the location gets written to."
This gets realized by having "only reads" in the entire last column of the table and not granting write accesses with a `SharedReadOnly` permission. In addition to that, however, we *also* need to ensure that in the stack we never have a `Unique` or `SharedReadWrite` *on top* of a `SharedReadOnly`! In a nonsense stack like `[ (0: SharedReadOnly), (1: SharedReadWrite) ]` we could write with a `Tagged(1)` pointer without invalidating the `Tagged(0)` pointers. + +Now the question is, how to we *use* this "language"? +We have to define which items and permissions get added to the borrow stack. +Just like before, this happens during retagging. +Before we discuss retagging in general, let us look at our motivating example and see how retagging works there in Stacked Borrows 2, and how this program is defined to cause UB: +{% highlight rust %} +fn main() { + let x = &mut 0u32; + Retag(x); // Tagged(0) + // Stack: [ (0: Unique) ] + + let p = x as *mut u32; + Retag([raw] p); // Untagged + // Stack: [ (0: Unique), (Untagged: SharedReadWrite) ] + + foo(x, p); +} + +fn foo(a: &mut u32, y: *mut u32) -> u32 { + Retag(a); // Tagged(1) + // Stack: [ (0: Unique), (1: Unique) ] + + *a = 1; + + let _b = &*a; + Retag(_b); // Tagged(2) + // Stack: [ (0: Unique), (1: Unique), (2: SharedReadOnly) ] + + // UB: y is Untagged, and there is no granting item in the stack! + unsafe { *y = 2; } + + return *a; +} +{% endhighlight %} +Initially, `x` with tag `Tagged(0)` is the only reference, and the stack says that this is the only pointer with any kind of permission. +Next, we cast `x` to a raw pointer. +The raw retagging of `p` turns `p` into an `Untagged` pointer, and adds a new item granting thusly tagged pointers `SharedReadWrite` permission. +(Really, in the MIR it will say `&mut *x as *mut u32`, so there will be an additional `Unique` permission for the temporary mutable reference, but that makes no difference and I hope [we will change that eventually](https://github.com/rust-lang/rfcs/pull/2582).) + +Then `foo` gets called, which starts with the usual retagging of all reference arguments. +`a` is originally `Tagged(0)`, and retagging a mutable reference acts like an access (just like in Stacked Borrows 1), so the first thing that happens is that all items above the granting item that are incompatible with a write access for a `Unique` permission get removed from the stack. +In our case, this means that the `Untagged: SharedReadWrite` gets removed. +Then, `a` gets the new tag `Tagged(1)` and `1: Unique` gets pushed on top of the stack. +Nothing interesting happens when writing to `a`. +When `_b` gets created, it gets assigned the new tag `Tagged(2)`, and a new item `2: SharedReadOnly` is pushed to the stack. +As we can see, shared and mutable references no longer differ in the tag they carry; the only difference is what kind of permission they get granted. +(I think this is a nice improvement from Stacked Borrows 1, where shared references had a different kind of tag. In particular, transmutes between mutable references and shared pointers no longer need any kind of special treatment.) +Finally, we come to the interesting point: the program writes to `y`. +That pointer is `Untagged`, and there is no item granting any access to such pointers, and thus the access is UB! +This is in contrast to Stacked Borrows 1, where instead of `2: SharedReadOnly` we set a special "frozen" marker on this location that would, as a side-effect, also grant untagged pointers read-only access. + +In general, during retagging, we start with some original tag used by our "parent" pointer, and we have a fresh tag `N` that will be used for the new pointer. +We have to decide which permission to grant so this tag, and where in the "stack" to insert it. +(We will not always insert new items at the top---again, as we will see following a strict stack discipline does not work.) +All of this happens for every location that the reference covers (as determined by its type). + +**Retagging a mutable reference.** +The simplest case of retagging is handling mutable references: +just like in Stacked Borrows 1, this starts by performing the actions of a write access with the parent tag. +`Unique` permissions are incompatible with anything on a write, so all items above the granting item get removed. +Then we add `N: Unique` on top of the stack to grant the new tag unique access to this location. + +This encodes the idea that mutable references must be used in a well-nested way---if you just consider mutable references, Stacked Borrows 2 still follows a stack discipline. + +**Retagging a shared reference.** +When retagging a shared reference, we have to be mindful of `UnsafeCell`. + +Outside of `UnsafeCell`, we start by performing the actions of a read access with the parent tag. +Then we push `N: SharedReadOnly` on top of the borrow stack. +This way, we grant the new pointer read-only access but make sure that it gets invalidated on the next write through any aliasing pointer (because all write-granting items are below us, and thus we test for compatibility when they get used). +There might be items left between the granting item and the one we just added, but that's okay: if any of them gets used for reading, that will be compatible with the `SharedReadOnly` according to our table above; and if any of them gets used for writing then it is important that our `SharedReadOnly` gets removed. + +When we are inside an `UnsafeCell`, we will *not* perform the actions of a memory access! +Interior mutability allows all sorts of crazy aliasing, and in particular, one can call a function with signature +{% highlight rust %} +fn aliasing(refcell: &RefCell, inner: &mut i32) +{% endhighlight %} +such that `inner` points *into* `refcell`, i.e., the two pointers overlap! +Retagging `refcell` must not remove the `Unique` item associated with `inner`. + +So, when retagging inside an `UnsafeCell`, we *find* the write-granting item for the parent's tag, and then we add `N: SharedReadWrite` just above it. +This grants write access, as is clearly needed for interior mutability. +We cannot add the new item at the top of the stack because that would add it *on top of* the item granting `inner`. +Any write to `inner` would remove our item from the stack! +Instead, we make our item sit just on top of its parent, reflecting the way one pointer got derived from the other. + +**Retagging a raw pointer.** +Retagging for raw pointers happens only immediately after a reference-to-raw-pointer cast. +Unlike in Stacked Borrows 1, retagging depends on whether this is a `*const T` or `*mut T` pointer---this is a departure from the principle that these two types are basically the same (except for variance). +I have recently learned that the borrow checker actually handles `*mut` and `*const*` casts differently; also see [this long comment](https://github.com/rust-lang/rust/issues/56604#issuecomment-477954315). +Given that the idea of Stacked Borrows is to start with what the borrow checker does and extrapolate to a dynamic model that also encompasses raw pointers, I felt that it makes sense for now to mirror this behavior in Stacked Borrows. +This is certainly not a final decision though, and I feel we should eventually have a discussion whether we should make the borrow checker and Stacked Borrows both treat `*const T` and `*mut T` the same. + +When casting to a `*mut T`, we basically behave like in the above case for inside an `UnsafeCell` behind a shared reference: we find the write-granting item for our parent's tag, and we add `Untagged: SharedReadWrite` just on top of it. +The way compatibility is defined for `SharedReadWrite`, there can be many such items next to each other on the stack, and using any one of them will not affect the others. +However, writing with a `Unique` permission further up the stack *will* invalidate all of them, reflecting the idea that when writing to a mutable reference, all raw pointers previously created from this reference become invalid. + +When casting to a `*const T`, we behave just like retagging for a shared reference `&T` (including being mindful of `UnsafeCell`). +There isn't really anything that a `*const T` can do that a shared reference cannot (both in terms of aliasing and mutation), so modeling them the same way makes sense. + +### Two-phase borrows + +Stacked Borrows 1 [had some support for two-phase borrows]({% post_url 2018-12-26-stacked-borrows-barriers %}), but some advanced forms of two-phase borrows that used to be allowed by the borrow checker [could not be handled](https://github.com/rust-lang/rust/issues/56254). +With the additional flexibility of Stacked Borrows 2 and its departure from a strict stack discipline, it is possible to accept at least the known examples of this pattern that were previously rejected: +{% highlight rust %} +fn two_phase_overlapping1() { + let mut x = vec![]; + let p = &x; + x.push(p.len()); +} +{% endhighlight %} +Previously, when the implicit reborrow of `x` in `x.push` got executed, that would remove the item for `p` from the stack. +This happens as part of the implicit write access that occurs when a mutable reference gets retagged. +With Stacked Borrows 2, we can do something else: +when retagging a two-phase mutable borrow, we do *not* perform the actions of a write access. +Instead, we just find the write-granting item for our parent's tag, and then add `N: Unique` just above it. +This has the consequence that on the first write access to this new pointer, everything on top of it will be removed, but until then the existing item for `p` remains on the stack and can be used just as before. + +Just like with Stacked Borrows 1, we then proceed by doing a *shared* reborrow of the parent's tag from `N`. +This way, the parent pointer can be used in the ways a shared reference can (including writes, if there is interior mutability) without invalidating `N`. +This is somewhat strange because we then have "parent tag - new tag - parent tag" on the stack in that order, so we no longer properly reflect the way pointers got derived from each other. +More analysis will be needed to figure out the consequences of this. + +We should also try to fully understand the effect this "weak" form of a mutable reborrow (without a virtual write access) has on the optimizations that can be performed. +Most of the cases of Stacked Borrows violations I found in the standard library are caused by the fact that even just *creating* a mutable reference asserts that it is unique and invalidates aliasing pointers, so if we could weaken that we would remove one of the most common causes of UB caused by Stacked Borrows. +On the other hand, this means that the compiler will have a harder time reordering uses of mutable references with function calls, because there are fewer cases where a mutable reference is actually assumed to be unique. + +### Barriers are dead, long live protectors + +With the departure from a strict stack discipline, I also had to re-think the concept of [barriers]({% post_url 2018-12-26-stacked-borrows-barriers %}). +The name was anyway terrible, so I replaced barriers by *protectors*: an `Item` actually consists not only of a `Tag` and a `Permission`, but also optionally of a "protector" represented as a `CallId` referencing some function call (i.e., some stack frame). +As long as that function call is running, the item with the protector may not be removed from the stack (or else we have UB). +This has pretty much the same effects as barriers did in Stacked Borrows 1. + +## Why not a strict stack discipline? +[no-stack]: #why-not-a-strict-stack-discipline + +The biggest surprise for me in designing Stacked Borrows 2 was that I was not able to enforce a strict stack discipline any more. +For retagging, that is only partially surprising; really in Stacked Borrows 1 we already added barriers below the "frozen" marker sitting next to a stack, and the `aliasing` example with the `RefCell` mentioned above only worked due to a hack that relied on reusing existing items in the middle of the stack instead of pushing a new one. +However, I had initially hoped that the rules for memory accesses would be slightly different: +my plan was that after finding the granting item, we would seek upwards through the stack and find the first incompatible item, and then remove everything starting there. +That would be a stack discipline; we would basically pop items until all items above the granting item are compatible. + +Unfortunately, that would reject many reasonable programs, such as: +{% highlight rust %} +fn test(x: &mut [i32]) -> i32 { unsafe { + let raw = x.as_mut_ptr(); // implicitly: as_mut_ptr(&mut *x) + let _val = x[1]; + return *raw; +} } +{% endhighlight %} +The issue with this example is that when calling `as_mut_ptr`, we reborrow `x`. +So after the first line the stack would look something like (using `x` as notation for `x`'s tag) +``` +[ ..., (x: Unique), (_: Unique), (Untagged: SharedReadWrite) ] +``` +In other words, there would be some `Unique` item *between* the items for `x` and `raw`. +When reading from `x` in the second line, we determine that this `Unique` tag is not compatible. +This is important; we have to ensure that any access from a parent pointer invalidates `Unique` pointers---that's what makes them unique! +However, if we follow a stack discipline, that means we have to pop the `Untagged: SharedReadWrite` *and* the `_: Unique` off the stack, making the third line UB because the raw pointer got invalidated. + +`test` seems like a perfectly reasonable function, and in fact this pattern is used in the standard library. +So in order to allow such code, accesses in Stacked Borrows 2 do *not* follow a stack discipline: +we remove all incompatible items above the granting item and ignore any interleaved compatible items. +As a consequence, line 2 in the above program removes `_: Unique` but keeps `Untagged: SharedReadWrite`, so line 3 is okay. + +However, this means we also accept the following, even if we eventually do fully precise tracking of raw pointers: +{% highlight rust %} +fn main() { unsafe { + let x = &mut 0; + let raw1 = x as *mut _; + // Stack: [ (x: Unique), (raw1: SharedReadWrite) ] + + let tmp = &mut *raw1; + let raw2 = tmp as *mut _; + // Stack: [ (x: Unique), (raw1: SharedReadWrite), (tmp: Unique), (raw2: SharedReadWrite) ] + + *raw1 = 1; // This will invalidate tmp, but not raw2. + // Stack: [ (x: Unique), (raw1: SharedReadWrite), (raw2: SharedReadWrite) ] + + let _val = *raw2; +} } +{% endhighlight %} +Naively I would assume that if we tell LLVM that `tmp` is a unique pointer, it will conclude that `raw2` cannot alias with `raw1` as that was not derived from `tmp`, and hence LLVM might conclude that `_val` must be 0. +This means the program above would have to have UB. +However, in the current framework I do not see a way to make it UB without also making the reasonable example from the beginning of this section UB. +So maybe we can find a way to express to LLVM precisely what we mean, or maybe we can only exploit some of these properties in home-grown optimizations, or maybe we find a way to have UB in the second example but not in the first. +(Like, maybe we do the stack-like behavior on writes but keep the more lenient current behavior on reads? That seems annoying to implement though. Maybe a stack is just the wrong data structure, and we should use something more tree-like.) + +## Conclusion + +Stacked Borrows 2 shares with Stacked Borrows 1 just the general structure: pointers are tagged, and there is a per-location stack indicating which tags are allowed to perform which kinds of operations on this location. +In the new version, shared references are tagged the same way as mutable references (just with an ID to distinguish multiple references pointing to the same location); the stack keeps track of which IDs are read-only (shared) pointers and which are unique (mutable) pointers. +The only operations affected by Stacked Borrows 2 are memory accesses and the `Retag` instructions explicitly represented in the MIR; there is no longer any action on a pointer dereference. +This balances the extra complexity of the new access rules (the new implementation is actually a dozen lines shorter than the old, despite long comments). +The "stack" is unfortunately not used in a completely stack-like fashion though. + +I leave it as an exercise to the reader to convince yourself that the [key properties of Stacked Borrows 1]({% post_url 2018-11-16-stacked-borrows-implementation %}#5-key-properties) still hold, and that uniqueness of mutable references is maintained even if we do a shared reborrow, as long as we keep that shared reference to ourselves. + +The new model gives some wiggle-room in both the notion of which permissions are compatible with others and where exactly which permissions are added in the "stack" in a reborrow. +We could use that wiggle-room to experiment with a bunch of closely related models to see how they affect which code gets accepted and analyze their impact on optimizations. + +I am quite excited by this new model! +It puts us into a good position to do more precise tracking for raw pointers, similar to what already happens for shared references (and something I was worried about in the original model). +That will be needed for compatibility with LLVM. +However, [there are still some known issues](https://github.com/rust-lang/unsafe-code-guidelines/issues?q=is%3Aissue+is%3Aopen+label%3Atopic-stacked-borrows), and also the fact that we do not actually use the "stack" as a stack indicates that maybe we should use a different structure there (potentially something more like a tree). +This is definitely not the final word, but I think it is a step in the right direction, and I am curious to see how it works out. +As usual, if you have any questions or comments, please join the discussion in the forums! diff --git a/personal/_sass/_base.scss b/personal/_sass/_base.scss index 7943fe3..765e459 100644 --- a/personal/_sass/_base.scss +++ b/personal/_sass/_base.scss @@ -81,3 +81,9 @@ li > p { .comment { margin-top: 1.5em; } + +table td, table th { + border: 1px solid $light-text-color; + padding: 6px 13px; + font-weight: normal; +} -- 2.39.5 From 09739e324b39b1e8a96e4e6b867b17a473f73a07 Mon Sep 17 00:00:00 2001 From: Ralf Jung Date: Tue, 30 Apr 2019 21:12:55 +0200 Subject: [PATCH 16/16] add forum link --- personal/_posts/2019-04-30-stacked-borrows-2.md | 1 + 1 file changed, 1 insertion(+) diff --git a/personal/_posts/2019-04-30-stacked-borrows-2.md b/personal/_posts/2019-04-30-stacked-borrows-2.md index 87617ea..d9037c0 100644 --- a/personal/_posts/2019-04-30-stacked-borrows-2.md +++ b/personal/_posts/2019-04-30-stacked-borrows-2.md @@ -1,6 +1,7 @@ --- title: "Stacked Borrows 2" categories: rust research +forum: https://internals.rust-lang.org/t/stacked-borrows-2/9951 --- Recently, I have [significantly updated](https://github.com/rust-lang/miri/pull/695) Stacked Borrows in order to fix some issues with the handling of shared references that were uncovered in the previous version. -- 2.39.5