aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPaul Mackerras <paulus@samba.org>2011-12-12 12:37:21 +0000
committerAvi Kivity <avi@redhat.com>2012-03-05 14:52:38 +0200
commita355aa54f1d25dff83c0feef8863d83a76988fdb (patch)
tree4fe27e8dd1a3935757b319726a8b8bdc10496930
parent342d3db763f2621ed4546ebf8f6c61cb29d7fbdb (diff)
KVM: Add barriers to allow mmu_notifier_retry to be used locklessly
This adds an smp_wmb in kvm_mmu_notifier_invalidate_range_end() and an smp_rmb in mmu_notifier_retry() so that mmu_notifier_retry() will give the correct answer when called without kvm->mmu_lock being held. PowerPC Book3S HV KVM wants to use a bitlock per guest page rather than a single global spinlock in order to improve the scalability of updates to the guest MMU hashed page table, and so needs this. Signed-off-by: Paul Mackerras <paulus@samba.org> Acked-by: Avi Kivity <avi@redhat.com> Signed-off-by: Alexander Graf <agraf@suse.de> Signed-off-by: Avi Kivity <avi@redhat.com>
-rw-r--r--include/linux/kvm_host.h14
-rw-r--r--virt/kvm/kvm_main.c6
2 files changed, 12 insertions, 8 deletions
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index d4d4d709211..eada8e69fe5 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -702,12 +702,16 @@ static inline int mmu_notifier_retry(struct kvm_vcpu *vcpu, unsigned long mmu_se
if (unlikely(vcpu->kvm->mmu_notifier_count))
return 1;
/*
- * Both reads happen under the mmu_lock and both values are
- * modified under mmu_lock, so there's no need of smb_rmb()
- * here in between, otherwise mmu_notifier_count should be
- * read before mmu_notifier_seq, see
- * mmu_notifier_invalidate_range_end write side.
+ * Ensure the read of mmu_notifier_count happens before the read
+ * of mmu_notifier_seq. This interacts with the smp_wmb() in
+ * mmu_notifier_invalidate_range_end to make sure that the caller
+ * either sees the old (non-zero) value of mmu_notifier_count or
+ * the new (incremented) value of mmu_notifier_seq.
+ * PowerPC Book3s HV KVM calls this under a per-page lock
+ * rather than under kvm->mmu_lock, for scalability, so
+ * can't rely on kvm->mmu_lock to keep things ordered.
*/
+ smp_rmb();
if (vcpu->kvm->mmu_notifier_seq != mmu_seq)
return 1;
return 0;
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 64be836f334..9f32bffd37c 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -357,11 +357,11 @@ static void kvm_mmu_notifier_invalidate_range_end(struct mmu_notifier *mn,
* been freed.
*/
kvm->mmu_notifier_seq++;
+ smp_wmb();
/*
* The above sequence increase must be visible before the
- * below count decrease but both values are read by the kvm
- * page fault under mmu_lock spinlock so we don't need to add
- * a smb_wmb() here in between the two.
+ * below count decrease, which is ensured by the smp_wmb above
+ * in conjunction with the smp_rmb in mmu_notifier_retry().
*/
kvm->mmu_notifier_count--;
spin_unlock(&kvm->mmu_lock);