From cd6b065c44d7075763f8aa86332ca5e9ad80a035 Mon Sep 17 00:00:00 2001 From: Dominik Dingel Date: Thu, 6 Jun 2013 15:32:37 +0200 Subject: KVM: async_pf: Provide additional direct page notification By setting a Kconfig option, the architecture can control when guest notifications will be presented by the apf backend. There is the default batch mechanism, working as before, where the vcpu thread should pull in this information. Opposite to this, there is now the direct mechanism, that will push the information to the guest. This way s390 can use an already existing architecture interface. Still the vcpu thread should call check_completion to cleanup leftovers. Signed-off-by: Dominik Dingel Signed-off-by: Christian Borntraeger (cherry picked from commit e0ead41a6dac09f86675ce07a66e4b253a9b7bd5) Signed-off-by: Christoffer Dall --- virt/kvm/Kconfig | 4 ++++ virt/kvm/async_pf.c | 20 ++++++++++++++++++-- 2 files changed, 22 insertions(+), 2 deletions(-) (limited to 'virt') diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig index fbe1a48bd629..13f2d19793e3 100644 --- a/virt/kvm/Kconfig +++ b/virt/kvm/Kconfig @@ -22,6 +22,10 @@ config KVM_MMIO config KVM_ASYNC_PF bool +# Toggle to switch between direct notification and batch job +config KVM_ASYNC_PF_SYNC + bool + config HAVE_KVM_MSI bool diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c index 8631d9c14320..00980ab02c45 100644 --- a/virt/kvm/async_pf.c +++ b/virt/kvm/async_pf.c @@ -28,6 +28,21 @@ #include "async_pf.h" #include +static inline void kvm_async_page_present_sync(struct kvm_vcpu *vcpu, + struct kvm_async_pf *work) +{ +#ifdef CONFIG_KVM_ASYNC_PF_SYNC + kvm_arch_async_page_present(vcpu, work); +#endif +} +static inline void kvm_async_page_present_async(struct kvm_vcpu *vcpu, + struct kvm_async_pf *work) +{ +#ifndef CONFIG_KVM_ASYNC_PF_SYNC + kvm_arch_async_page_present(vcpu, work); +#endif +} + static struct kmem_cache *async_pf_cache; int kvm_async_pf_init(void) @@ -69,6 +84,7 @@ static void async_pf_execute(struct work_struct *work) down_read(&mm->mmap_sem); get_user_pages(current, mm, addr, 1, 1, 0, NULL, NULL); up_read(&mm->mmap_sem); + kvm_async_page_present_sync(vcpu, apf); unuse_mm(mm); spin_lock(&vcpu->async_pf.lock); @@ -138,7 +154,7 @@ void kvm_check_async_pf_completion(struct kvm_vcpu *vcpu) } } -int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn, +int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, unsigned long hva, struct kvm_arch_async_pf *arch) { struct kvm_async_pf *work; @@ -159,7 +175,7 @@ int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn, work->wakeup_all = false; work->vcpu = vcpu; work->gva = gva; - work->addr = gfn_to_hva(vcpu->kvm, gfn); + work->addr = hva; work->arch = *arch; work->mm = current->mm; atomic_inc(&work->mm->mm_count); -- cgit v1.2.3 From 23d32b54806258d8109e8c6613d6af9958c8a9f4 Mon Sep 17 00:00:00 2001 From: Dominik Dingel Date: Tue, 3 Sep 2013 12:31:16 +0200 Subject: KVM: async_pf: Allow to wait for outstanding work On s390 we are not able to cancel work. Instead we will flush the work and wait for completion. Signed-off-by: Dominik Dingel Signed-off-by: Christian Borntraeger (cherry picked from commit 9f2ceda49c6b8827c795731c204f6c2587886e2c) Signed-off-by: Christoffer Dall --- virt/kvm/async_pf.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'virt') diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c index 00980ab02c45..889aad022014 100644 --- a/virt/kvm/async_pf.c +++ b/virt/kvm/async_pf.c @@ -113,11 +113,16 @@ void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu) list_entry(vcpu->async_pf.queue.next, typeof(*work), queue); list_del(&work->queue); + +#ifdef CONFIG_KVM_ASYNC_PF_SYNC + flush_work(&work->work); +#else if (cancel_work_sync(&work->work)) { mmdrop(work->mm); kvm_put_kvm(vcpu->kvm); /* == work->vcpu->kvm */ kmem_cache_free(async_pf_cache, work); } +#endif } spin_lock(&vcpu->async_pf.lock); -- cgit v1.2.3 From f366cc9fadfc675185b24fcd0819e933a3021697 Mon Sep 17 00:00:00 2001 From: Dominik Dingel Date: Fri, 31 Jan 2014 14:32:46 +0100 Subject: KVM: async_pf: Add missing call for async page present Commit KVM: async_pf: Provide additional direct page notification missed the call from kvm_check_async_pf_completion to the new introduced function. Reported-by: Paolo Bonzini Signed-off-by: Dominik Dingel Signed-off-by: Paolo Bonzini (cherry picked from commit 1179ba539541347d5427cde8bcfdaa5ead14f3aa) Signed-off-by: Christoffer Dall --- virt/kvm/async_pf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'virt') diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c index 889aad022014..10df100c4514 100644 --- a/virt/kvm/async_pf.c +++ b/virt/kvm/async_pf.c @@ -151,7 +151,7 @@ void kvm_check_async_pf_completion(struct kvm_vcpu *vcpu) spin_unlock(&vcpu->async_pf.lock); kvm_arch_async_page_ready(vcpu, work); - kvm_arch_async_page_present(vcpu, work); + kvm_async_page_present_async(vcpu, work); list_del(&work->queue); vcpu->async_pf.queued--; -- cgit v1.2.3 From 143bf65c5f01b5bb9c1d16222ea83f5977ba17a8 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Sat, 8 Feb 2014 08:51:57 +0100 Subject: asmlinkage, kvm: Make kvm_rebooting visible kvm_rebooting is referenced from assembler code, thus needs to be visible. Cc: Gleb Natapov Cc: Paolo Bonzini Signed-off-by: Andi Kleen Link: http://lkml.kernel.org/r/1391845930-28580-1-git-send-email-ak@linux.intel.com Signed-off-by: H. Peter Anvin (cherry picked from commit 52480137d82062bb8d0fb778cb9934667958e367) Signed-off-by: Christoffer Dall --- virt/kvm/kvm_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'virt') diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 03a0381b1cb7..b5ec7fb986f6 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -102,7 +102,7 @@ static void kvm_release_pfn_dirty(pfn_t pfn); static void mark_page_dirty_in_slot(struct kvm *kvm, struct kvm_memory_slot *memslot, gfn_t gfn); -bool kvm_rebooting; +__visible bool kvm_rebooting; EXPORT_SYMBOL_GPL(kvm_rebooting); static bool largepages_enabled = true; -- cgit v1.2.3 From 545c8a069e3296878fab382bdc7cc66751a5759b Mon Sep 17 00:00:00 2001 From: Michael Mueller Date: Wed, 26 Feb 2014 16:14:18 +0100 Subject: KVM: add kvm_arch_vcpu_runnable() test to kvm_vcpu_on_spin() loop Use the arch specific function kvm_arch_vcpu_runnable() to add a further criterium to identify a suitable vcpu to yield to during undirected yield processing. Signed-off-by: Michael Mueller Reviewed-by: Christian Borntraeger Signed-off-by: Christian Borntraeger Signed-off-by: Paolo Bonzini (cherry picked from commit 98f4a14676127397c54cab7d6119537ed4d113a2) Signed-off-by: Christoffer Dall --- virt/kvm/kvm_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'virt') diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index b5ec7fb986f6..716f70082529 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1804,7 +1804,7 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me) continue; if (vcpu == me) continue; - if (waitqueue_active(&vcpu->wq)) + if (waitqueue_active(&vcpu->wq) && !kvm_arch_vcpu_runnable(vcpu)) continue; if (!kvm_vcpu_eligible_for_directed_yield(vcpu)) continue; -- cgit v1.2.3 From 52f0b0884cd675959f227fca8338903629f17740 Mon Sep 17 00:00:00 2001 From: Haibin Wang Date: Thu, 10 Apr 2014 13:14:32 +0100 Subject: KVM: ARM: vgic: Fix sgi dispatch problem When dispatch SGI(mode == 0), that is the vcpu of VM should send sgi to the cpu which the target_cpus list. So, there must add the "break" to branch of case 0. Cc: # 3.10+ Signed-off-by: Haibin Wang Acked-by: Marc Zyngier Signed-off-by: Christoffer Dall (cherry picked from commit 91021a6c8ffdc55804dab5acdfc7de4f278b9ac3) Signed-off-by: Christoffer Dall --- virt/kvm/arm/vgic.c | 1 + 1 file changed, 1 insertion(+) (limited to 'virt') diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index 8ca405cd7c1a..26954a7d9b03 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -916,6 +916,7 @@ static void vgic_dispatch_sgi(struct kvm_vcpu *vcpu, u32 reg) case 0: if (!target_cpus) return; + break; case 1: target_cpus = ((1 << nrcpus) - 1) & ~(1 << vcpu_id) & 0xff; -- cgit v1.2.3 From fe2e974b744f9b9c1c8b0bebc7a8eba3d7d0e5df Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Mon, 21 Apr 2014 15:26:01 +0200 Subject: KVM: async_pf: mm->mm_users can not pin apf->mm get_user_pages(mm) is simply wrong if mm->mm_users == 0 and exit_mmap/etc was already called (or is in progress), mm->mm_count can only pin mm->pgd and mm_struct itself. Change kvm_setup_async_pf/async_pf_execute to inc/dec mm->mm_users. kvm_create_vm/kvm_destroy_vm play with ->mm_count too but this case looks fine at first glance, it seems that this ->mm is only used to verify that current->mm == kvm->mm. Signed-off-by: Oleg Nesterov Cc: stable@vger.kernel.org Signed-off-by: Paolo Bonzini (cherry picked from commit 41c22f626254b9dc0376928cae009e73d1b6a49a) Signed-off-by: Christoffer Dall --- virt/kvm/async_pf.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'virt') diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c index 10df100c4514..06e6401d6ef4 100644 --- a/virt/kvm/async_pf.c +++ b/virt/kvm/async_pf.c @@ -101,7 +101,7 @@ static void async_pf_execute(struct work_struct *work) if (waitqueue_active(&vcpu->wq)) wake_up_interruptible(&vcpu->wq); - mmdrop(mm); + mmput(mm); kvm_put_kvm(vcpu->kvm); } @@ -118,7 +118,7 @@ void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu) flush_work(&work->work); #else if (cancel_work_sync(&work->work)) { - mmdrop(work->mm); + mmput(work->mm); kvm_put_kvm(vcpu->kvm); /* == work->vcpu->kvm */ kmem_cache_free(async_pf_cache, work); } @@ -183,7 +183,7 @@ int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, unsigned long hva, work->addr = hva; work->arch = *arch; work->mm = current->mm; - atomic_inc(&work->mm->mm_count); + atomic_inc(&work->mm->mm_users); kvm_get_kvm(work->vcpu->kvm); /* this can't really happen otherwise gfn_to_pfn_async @@ -201,7 +201,7 @@ int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, unsigned long hva, return 1; retry_sync: kvm_put_kvm(work->vcpu->kvm); - mmdrop(work->mm); + mmput(work->mm); kmem_cache_free(async_pf_cache, work); return 0; } -- cgit v1.2.3 From ff6f383357fd59f72e2db12458db70eb2d72e0cd Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Fri, 11 Apr 2014 00:07:18 +0200 Subject: KVM: arm/arm64: vgic: fix GICD_ICFGR register accesses Since KVM internally represents the ICFGR registers by stuffing two of them into one word, the offset for accessing the internal representation and the one for the MMIO based access are different. So keep the original offset around, but adjust the internal array offset by one bit. Reported-by: Haibin Wang Signed-off-by: Andre Przywara Signed-off-by: Marc Zyngier Signed-off-by: Christoffer Dall (cherry picked from commit f2ae85b2ab3776b9e4e42e5b6fa090f40d396794) Signed-off-by: Christoffer Dall --- virt/kvm/arm/vgic.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'virt') diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index 26954a7d9b03..2f8aee59a8fe 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -548,11 +548,10 @@ static bool handle_mmio_cfg_reg(struct kvm_vcpu *vcpu, u32 val; u32 *reg; - offset >>= 1; reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_cfg, - vcpu->vcpu_id, offset); + vcpu->vcpu_id, offset >> 1); - if (offset & 2) + if (offset & 4) val = *reg >> 16; else val = *reg & 0xffff; @@ -561,13 +560,13 @@ static bool handle_mmio_cfg_reg(struct kvm_vcpu *vcpu, vgic_reg_access(mmio, &val, offset, ACCESS_READ_VALUE | ACCESS_WRITE_VALUE); if (mmio->is_write) { - if (offset < 4) { + if (offset < 8) { *reg = ~0U; /* Force PPIs/SGIs to 1 */ return false; } val = vgic_cfg_compress(val); - if (offset & 2) { + if (offset & 4) { *reg &= 0xffff; *reg |= val << 16; } else { -- cgit v1.2.3 From 5c3e6d772feb69cc13c6cb0359a854a1d8f8791a Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Mon, 21 Apr 2014 15:25:58 +0200 Subject: KVM: async_pf: kill the unnecessary use_mm/unuse_mm async_pf_execute() async_pf_execute() has no reasons to adopt apf->mm, gup(current, mm) should work just fine even if current has another or NULL ->mm. Recently kvm_async_page_present_sync() was added insedie the "use_mm" section, but it seems that it doesn't need current->mm too. Signed-off-by: Oleg Nesterov Reviewed-by: Andrea Arcangeli Signed-off-by: Paolo Bonzini (cherry picked from commit d72d946d0b649b79709b99b9d5cb7269fff8afaa) Signed-off-by: Christoffer Dall --- virt/kvm/async_pf.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'virt') diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c index 06e6401d6ef4..cda703e512d3 100644 --- a/virt/kvm/async_pf.c +++ b/virt/kvm/async_pf.c @@ -80,12 +80,10 @@ static void async_pf_execute(struct work_struct *work) might_sleep(); - use_mm(mm); down_read(&mm->mmap_sem); get_user_pages(current, mm, addr, 1, 1, 0, NULL, NULL); up_read(&mm->mmap_sem); kvm_async_page_present_sync(vcpu, apf); - unuse_mm(mm); spin_lock(&vcpu->async_pf.lock); list_add_tail(&apf->link, &vcpu->async_pf.done); -- cgit v1.2.3 From a96fd4326a9952dac624c4a0539859c5d714cf0d Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Mon, 28 Apr 2014 17:03:00 +0200 Subject: KVM: async_pf: change async_pf_execute() to use get_user_pages(tsk => NULL) async_pf_execute() passes tsk == current to gup(), this is doesn't hurt but unnecessary and misleading. "tsk" is only used to account the number of faults and current is the random workqueue thread. Signed-off-by: Oleg Nesterov Suggested-by: Andrea Arcangeli Signed-off-by: Paolo Bonzini (cherry picked from commit e9545b9f8aeb63e05818e4b3250057260bc072aa) Signed-off-by: Christoffer Dall --- virt/kvm/async_pf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'virt') diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c index cda703e512d3..d6a3d0993d88 100644 --- a/virt/kvm/async_pf.c +++ b/virt/kvm/async_pf.c @@ -81,7 +81,7 @@ static void async_pf_execute(struct work_struct *work) might_sleep(); down_read(&mm->mmap_sem); - get_user_pages(current, mm, addr, 1, 1, 0, NULL, NULL); + get_user_pages(NULL, mm, addr, 1, 1, 0, NULL, NULL); up_read(&mm->mmap_sem); kvm_async_page_present_sync(vcpu, apf); -- cgit v1.2.3 From b2e06f6e9514288ce818a22fcb290741370aeba3 Mon Sep 17 00:00:00 2001 From: Haibin Wang Date: Tue, 29 Apr 2014 14:49:17 +0800 Subject: KVM: ARM: vgic: Fix the overlap check action about setting the GICD & GICC base address. Currently below check in vgic_ioaddr_overlap will always succeed, because the vgic dist base and vgic cpu base are still kept UNDEF after initialization. The code as follows will be return forever. if (IS_VGIC_ADDR_UNDEF(dist) || IS_VGIC_ADDR_UNDEF(cpu)) return 0; So, before invoking the vgic_ioaddr_overlap, it needs to set the corresponding base address firstly. Signed-off-by: Haibin Wang Acked-by: Marc Zyngier Signed-off-by: Christoffer Dall (cherry picked from commit 30c2117085bc4e05d091cee6eba79f069b41a9cd) Signed-off-by: Christoffer Dall --- virt/kvm/arm/vgic.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'virt') diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index 2f8aee59a8fe..4dc45e27f10b 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -1667,10 +1667,11 @@ static int vgic_ioaddr_assign(struct kvm *kvm, phys_addr_t *ioaddr, if (addr + size < addr) return -EINVAL; + *ioaddr = addr; ret = vgic_ioaddr_overlap(kvm); if (ret) - return ret; - *ioaddr = addr; + *ioaddr = VGIC_ADDR_UNDEF; + return ret; } -- cgit v1.2.3 From ea1725ede5e06a06cd0d5cc88cfba569fd80e24d Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Thu, 16 Jan 2014 13:44:20 +0100 Subject: kvm/irqchip: Speed up KVM_SET_GSI_ROUTING When starting lots of dataplane devices the bootup takes very long on Christian's s390 with irqfd patches. With larger setups he is even able to trigger some timeouts in some components. Turns out that the KVM_SET_GSI_ROUTING ioctl takes very long (strace claims up to 0.1 sec) when having multiple CPUs. This is caused by the synchronize_rcu and the HZ=100 of s390. By changing the code to use a private srcu we can speed things up. This patch reduces the boot time till mounting root from 8 to 2 seconds on my s390 guest with 100 disks. Uses of hlist_for_each_entry_rcu, hlist_add_head_rcu, hlist_del_init_rcu are fine because they do not have lockdep checks (hlist_for_each_entry_rcu uses rcu_dereference_raw rather than rcu_dereference, and write-sides do not do rcu lockdep at all). Note that we're hardly relying on the "sleepable" part of srcu. We just want SRCU's faster detection of grace periods. Testing was done by Andrew Theurer using netperf tests STREAM, MAERTS and RR. The difference between results "before" and "after" the patch has mean -0.2% and standard deviation 0.6%. Using a paired t-test on the data points says that there is a 2.5% probability that the patch is the cause of the performance difference (rather than a random fluctuation). (Restricting the t-test to RR, which is the most likely to be affected, changes the numbers to respectively -0.3% mean, 0.7% stdev, and 8% probability that the numbers actually say something about the patch. The probability increases mostly because there are fewer data points). Cc: Marcelo Tosatti Cc: Michael S. Tsirkin Tested-by: Christian Borntraeger # s390 Reviewed-by: Christian Borntraeger Signed-off-by: Christian Borntraeger Signed-off-by: Paolo Bonzini (cherry picked from commit 719d93cd5f5c5c8775b7a38192069e8e1d1ac46e) Signed-off-by: Christoffer Dall --- virt/kvm/eventfd.c | 25 +++++++++++++++---------- virt/kvm/irq_comm.c | 17 +++++++++-------- virt/kvm/irqchip.c | 31 ++++++++++++++++--------------- virt/kvm/kvm_main.c | 16 ++++++++++------ 4 files changed, 50 insertions(+), 39 deletions(-) (limited to 'virt') diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index abe4d6043b36..41d06db53c9c 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c @@ -31,6 +31,7 @@ #include #include #include +#include #include #include "iodev.h" @@ -118,19 +119,22 @@ static void irqfd_resampler_ack(struct kvm_irq_ack_notifier *kian) { struct _irqfd_resampler *resampler; + struct kvm *kvm; struct _irqfd *irqfd; + int idx; resampler = container_of(kian, struct _irqfd_resampler, notifier); + kvm = resampler->kvm; - kvm_set_irq(resampler->kvm, KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID, + kvm_set_irq(kvm, KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID, resampler->notifier.gsi, 0, false); - rcu_read_lock(); + idx = srcu_read_lock(&kvm->irq_srcu); list_for_each_entry_rcu(irqfd, &resampler->list, resampler_link) eventfd_signal(irqfd->resamplefd, 1); - rcu_read_unlock(); + srcu_read_unlock(&kvm->irq_srcu, idx); } static void @@ -142,7 +146,7 @@ irqfd_resampler_shutdown(struct _irqfd *irqfd) mutex_lock(&kvm->irqfds.resampler_lock); list_del_rcu(&irqfd->resampler_link); - synchronize_rcu(); + synchronize_srcu(&kvm->irq_srcu); if (list_empty(&resampler->list)) { list_del(&resampler->link); @@ -221,17 +225,18 @@ irqfd_wakeup(wait_queue_t *wait, unsigned mode, int sync, void *key) unsigned long flags = (unsigned long)key; struct kvm_kernel_irq_routing_entry *irq; struct kvm *kvm = irqfd->kvm; + int idx; if (flags & POLLIN) { - rcu_read_lock(); - irq = rcu_dereference(irqfd->irq_entry); + idx = srcu_read_lock(&kvm->irq_srcu); + irq = srcu_dereference(irqfd->irq_entry, &kvm->irq_srcu); /* An event has been signaled, inject an interrupt */ if (irq) kvm_set_msi(irq, kvm, KVM_USERSPACE_IRQ_SOURCE_ID, 1, false); else schedule_work(&irqfd->inject); - rcu_read_unlock(); + srcu_read_unlock(&kvm->irq_srcu, idx); } if (flags & POLLHUP) { @@ -363,7 +368,7 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args) } list_add_rcu(&irqfd->resampler_link, &irqfd->resampler->list); - synchronize_rcu(); + synchronize_srcu(&kvm->irq_srcu); mutex_unlock(&kvm->irqfds.resampler_lock); } @@ -465,7 +470,7 @@ kvm_irqfd_deassign(struct kvm *kvm, struct kvm_irqfd *args) * another thread calls kvm_irq_routing_update before * we flush workqueue below (we synchronize with * kvm_irq_routing_update using irqfds.lock). - * It is paired with synchronize_rcu done by caller + * It is paired with synchronize_srcu done by caller * of that function. */ rcu_assign_pointer(irqfd->irq_entry, NULL); @@ -524,7 +529,7 @@ kvm_irqfd_release(struct kvm *kvm) /* * Change irq_routing and irqfd. - * Caller must invoke synchronize_rcu afterwards. + * Caller must invoke synchronize_srcu(&kvm->irq_srcu) afterwards. */ void kvm_irq_routing_update(struct kvm *kvm, struct kvm_irq_routing_table *irq_rt) diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c index e2e6b4473a96..ced4a542a031 100644 --- a/virt/kvm/irq_comm.c +++ b/virt/kvm/irq_comm.c @@ -163,6 +163,7 @@ int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq, int level) struct kvm_kernel_irq_routing_entry *e; int ret = -EINVAL; struct kvm_irq_routing_table *irq_rt; + int idx; trace_kvm_set_irq(irq, level, irq_source_id); @@ -174,8 +175,8 @@ int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq, int level) * Since there's no easy way to do this, we only support injecting MSI * which is limited to 1:1 GSI mapping. */ - rcu_read_lock(); - irq_rt = rcu_dereference(kvm->irq_routing); + idx = srcu_read_lock(&kvm->irq_srcu); + irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu); if (irq < irq_rt->nr_rt_entries) hlist_for_each_entry(e, &irq_rt->map[irq], link) { if (likely(e->type == KVM_IRQ_ROUTING_MSI)) @@ -184,7 +185,7 @@ int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq, int level) ret = -EWOULDBLOCK; break; } - rcu_read_unlock(); + srcu_read_unlock(&kvm->irq_srcu, idx); return ret; } @@ -253,22 +254,22 @@ void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq, mutex_lock(&kvm->irq_lock); hlist_del_rcu(&kimn->link); mutex_unlock(&kvm->irq_lock); - synchronize_rcu(); + synchronize_srcu(&kvm->irq_srcu); } void kvm_fire_mask_notifiers(struct kvm *kvm, unsigned irqchip, unsigned pin, bool mask) { struct kvm_irq_mask_notifier *kimn; - int gsi; + int idx, gsi; - rcu_read_lock(); - gsi = rcu_dereference(kvm->irq_routing)->chip[irqchip][pin]; + idx = srcu_read_lock(&kvm->irq_srcu); + gsi = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu)->chip[irqchip][pin]; if (gsi != -1) hlist_for_each_entry_rcu(kimn, &kvm->mask_notifier_list, link) if (kimn->irq == gsi) kimn->func(kimn, mask); - rcu_read_unlock(); + srcu_read_unlock(&kvm->irq_srcu, idx); } int kvm_set_routing_entry(struct kvm_irq_routing_table *rt, diff --git a/virt/kvm/irqchip.c b/virt/kvm/irqchip.c index 20dc9e4a8f6c..b43c275775cd 100644 --- a/virt/kvm/irqchip.c +++ b/virt/kvm/irqchip.c @@ -26,6 +26,7 @@ #include #include +#include #include #include #include "irq.h" @@ -33,19 +34,19 @@ bool kvm_irq_has_notifier(struct kvm *kvm, unsigned irqchip, unsigned pin) { struct kvm_irq_ack_notifier *kian; - int gsi; + int gsi, idx; - rcu_read_lock(); - gsi = rcu_dereference(kvm->irq_routing)->chip[irqchip][pin]; + idx = srcu_read_lock(&kvm->irq_srcu); + gsi = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu)->chip[irqchip][pin]; if (gsi != -1) hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list, link) if (kian->gsi == gsi) { - rcu_read_unlock(); + srcu_read_unlock(&kvm->irq_srcu, idx); return true; } - rcu_read_unlock(); + srcu_read_unlock(&kvm->irq_srcu, idx); return false; } @@ -54,18 +55,18 @@ EXPORT_SYMBOL_GPL(kvm_irq_has_notifier); void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin) { struct kvm_irq_ack_notifier *kian; - int gsi; + int gsi, idx; trace_kvm_ack_irq(irqchip, pin); - rcu_read_lock(); - gsi = rcu_dereference(kvm->irq_routing)->chip[irqchip][pin]; + idx = srcu_read_lock(&kvm->irq_srcu); + gsi = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu)->chip[irqchip][pin]; if (gsi != -1) hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list, link) if (kian->gsi == gsi) kian->irq_acked(kian); - rcu_read_unlock(); + srcu_read_unlock(&kvm->irq_srcu, idx); } void kvm_register_irq_ack_notifier(struct kvm *kvm, @@ -85,7 +86,7 @@ void kvm_unregister_irq_ack_notifier(struct kvm *kvm, mutex_lock(&kvm->irq_lock); hlist_del_init_rcu(&kian->link); mutex_unlock(&kvm->irq_lock); - synchronize_rcu(); + synchronize_srcu(&kvm->irq_srcu); #ifdef __KVM_HAVE_IOAPIC kvm_vcpu_request_scan_ioapic(kvm); #endif @@ -115,7 +116,7 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level, bool line_status) { struct kvm_kernel_irq_routing_entry *e, irq_set[KVM_NR_IRQCHIPS]; - int ret = -1, i = 0; + int ret = -1, i = 0, idx; struct kvm_irq_routing_table *irq_rt; trace_kvm_set_irq(irq, level, irq_source_id); @@ -124,12 +125,12 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level, * IOAPIC. So set the bit in both. The guest will ignore * writes to the unused one. */ - rcu_read_lock(); - irq_rt = rcu_dereference(kvm->irq_routing); + idx = srcu_read_lock(&kvm->irq_srcu); + irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu); if (irq < irq_rt->nr_rt_entries) hlist_for_each_entry(e, &irq_rt->map[irq], link) irq_set[i++] = *e; - rcu_read_unlock(); + srcu_read_unlock(&kvm->irq_srcu, idx); while(i--) { int r; @@ -226,7 +227,7 @@ int kvm_set_irq_routing(struct kvm *kvm, kvm_irq_routing_update(kvm, new); mutex_unlock(&kvm->irq_lock); - synchronize_rcu(); + synchronize_srcu_expedited(&kvm->irq_srcu); new = old; r = 0; diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 716f70082529..0b76d19ae1c5 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -457,11 +457,11 @@ static struct kvm *kvm_create_vm(unsigned long type) r = kvm_arch_init_vm(kvm, type); if (r) - goto out_err_nodisable; + goto out_err_no_disable; r = hardware_enable_all(); if (r) - goto out_err_nodisable; + goto out_err_no_disable; #ifdef CONFIG_HAVE_KVM_IRQCHIP INIT_HLIST_HEAD(&kvm->mask_notifier_list); @@ -473,10 +473,12 @@ static struct kvm *kvm_create_vm(unsigned long type) r = -ENOMEM; kvm->memslots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL); if (!kvm->memslots) - goto out_err_nosrcu; + goto out_err_no_srcu; kvm_init_memslots_id(kvm); if (init_srcu_struct(&kvm->srcu)) - goto out_err_nosrcu; + goto out_err_no_srcu; + if (init_srcu_struct(&kvm->irq_srcu)) + goto out_err_no_irq_srcu; for (i = 0; i < KVM_NR_BUSES; i++) { kvm->buses[i] = kzalloc(sizeof(struct kvm_io_bus), GFP_KERNEL); @@ -505,10 +507,12 @@ static struct kvm *kvm_create_vm(unsigned long type) return kvm; out_err: + cleanup_srcu_struct(&kvm->irq_srcu); +out_err_no_irq_srcu: cleanup_srcu_struct(&kvm->srcu); -out_err_nosrcu: +out_err_no_srcu: hardware_disable_all(); -out_err_nodisable: +out_err_no_disable: for (i = 0; i < KVM_NR_BUSES; i++) kfree(kvm->buses[i]); kfree(kvm->memslots); -- cgit v1.2.3 From b8e014350e67355d1f0ed9a5218f775ce90fee37 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 3 Jun 2014 13:44:17 +0200 Subject: KVM: add missing cleanup_srcu_struct Reported-by: hrg Signed-off-by: Paolo Bonzini (cherry picked from commit 820b3fcdeb80d30410f4427d2cbf9161c35fdeef) Signed-off-by: Christoffer Dall --- virt/kvm/kvm_main.c | 1 + 1 file changed, 1 insertion(+) (limited to 'virt') diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 0b76d19ae1c5..bf6cc062740d 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -608,6 +608,7 @@ static void kvm_destroy_vm(struct kvm *kvm) kvm_arch_destroy_vm(kvm); kvm_destroy_devices(kvm); kvm_free_physmem(kvm); + cleanup_srcu_struct(&kvm->irq_srcu); cleanup_srcu_struct(&kvm->srcu); kvm_arch_free_vm(kvm); hardware_disable_all(); -- cgit v1.2.3 From 5eb5fbe2dd286ee93e8687ec155c54027332e2e5 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 30 May 2013 10:20:36 +0100 Subject: KVM: arm/arm64: vgic: move GICv2 registers to their own structure In order to make way for the GICv3 registers, move the v2-specific registers to their own structure. Acked-by: Catalin Marinas Reviewed-by: Christoffer Dall Signed-off-by: Marc Zyngier (cherry picked from commit eede821dbfd58df89edb072da64e006321eaef58) Signed-off-by: Christoffer Dall --- virt/kvm/arm/vgic.c | 56 ++++++++++++++++++++++++++--------------------------- 1 file changed, 28 insertions(+), 28 deletions(-) (limited to 'virt') diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index 4dc45e27f10b..85c501a16d71 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -601,7 +601,7 @@ static bool handle_mmio_sgi_reg(struct kvm_vcpu *vcpu, static void vgic_retire_lr(int lr_nr, int irq, struct vgic_cpu *vgic_cpu) { clear_bit(lr_nr, vgic_cpu->lr_used); - vgic_cpu->vgic_lr[lr_nr] &= ~GICH_LR_STATE; + vgic_cpu->vgic_v2.vgic_lr[lr_nr] &= ~GICH_LR_STATE; vgic_cpu->vgic_irq_lr_map[irq] = LR_EMPTY; } @@ -626,7 +626,7 @@ static void vgic_unqueue_irqs(struct kvm_vcpu *vcpu) u32 *lr; for_each_set_bit(i, vgic_cpu->lr_used, vgic_cpu->nr_lr) { - lr = &vgic_cpu->vgic_lr[i]; + lr = &vgic_cpu->vgic_v2.vgic_lr[i]; irq = LR_IRQID(*lr); source_cpu = LR_CPUID(*lr); @@ -1007,7 +1007,7 @@ static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu) int lr; for_each_set_bit(lr, vgic_cpu->lr_used, vgic_cpu->nr_lr) { - int irq = vgic_cpu->vgic_lr[lr] & GICH_LR_VIRTUALID; + int irq = vgic_cpu->vgic_v2.vgic_lr[lr] & GICH_LR_VIRTUALID; if (!vgic_irq_is_enabled(vcpu, irq)) { vgic_retire_lr(lr, irq, vgic_cpu); @@ -1037,11 +1037,11 @@ static bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 sgi_source_id, int irq) /* Do we have an active interrupt for the same CPUID? */ if (lr != LR_EMPTY && - (LR_CPUID(vgic_cpu->vgic_lr[lr]) == sgi_source_id)) { + (LR_CPUID(vgic_cpu->vgic_v2.vgic_lr[lr]) == sgi_source_id)) { kvm_debug("LR%d piggyback for IRQ%d %x\n", - lr, irq, vgic_cpu->vgic_lr[lr]); + lr, irq, vgic_cpu->vgic_v2.vgic_lr[lr]); BUG_ON(!test_bit(lr, vgic_cpu->lr_used)); - vgic_cpu->vgic_lr[lr] |= GICH_LR_PENDING_BIT; + vgic_cpu->vgic_v2.vgic_lr[lr] |= GICH_LR_PENDING_BIT; return true; } @@ -1052,12 +1052,12 @@ static bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 sgi_source_id, int irq) return false; kvm_debug("LR%d allocated for IRQ%d %x\n", lr, irq, sgi_source_id); - vgic_cpu->vgic_lr[lr] = MK_LR_PEND(sgi_source_id, irq); + vgic_cpu->vgic_v2.vgic_lr[lr] = MK_LR_PEND(sgi_source_id, irq); vgic_cpu->vgic_irq_lr_map[irq] = lr; set_bit(lr, vgic_cpu->lr_used); if (!vgic_irq_is_edge(vcpu, irq)) - vgic_cpu->vgic_lr[lr] |= GICH_LR_EOI; + vgic_cpu->vgic_v2.vgic_lr[lr] |= GICH_LR_EOI; return true; } @@ -1155,9 +1155,9 @@ static void __kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu) epilog: if (overflow) { - vgic_cpu->vgic_hcr |= GICH_HCR_UIE; + vgic_cpu->vgic_v2.vgic_hcr |= GICH_HCR_UIE; } else { - vgic_cpu->vgic_hcr &= ~GICH_HCR_UIE; + vgic_cpu->vgic_v2.vgic_hcr &= ~GICH_HCR_UIE; /* * We're about to run this VCPU, and we've consumed * everything the distributor had in store for @@ -1173,21 +1173,21 @@ static bool vgic_process_maintenance(struct kvm_vcpu *vcpu) struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; bool level_pending = false; - kvm_debug("MISR = %08x\n", vgic_cpu->vgic_misr); + kvm_debug("MISR = %08x\n", vgic_cpu->vgic_v2.vgic_misr); - if (vgic_cpu->vgic_misr & GICH_MISR_EOI) { + if (vgic_cpu->vgic_v2.vgic_misr & GICH_MISR_EOI) { /* * Some level interrupts have been EOIed. Clear their * active bit. */ int lr, irq; - for_each_set_bit(lr, (unsigned long *)vgic_cpu->vgic_eisr, + for_each_set_bit(lr, (unsigned long *)vgic_cpu->vgic_v2.vgic_eisr, vgic_cpu->nr_lr) { - irq = vgic_cpu->vgic_lr[lr] & GICH_LR_VIRTUALID; + irq = vgic_cpu->vgic_v2.vgic_lr[lr] & GICH_LR_VIRTUALID; vgic_irq_clear_active(vcpu, irq); - vgic_cpu->vgic_lr[lr] &= ~GICH_LR_EOI; + vgic_cpu->vgic_v2.vgic_lr[lr] &= ~GICH_LR_EOI; /* Any additional pending interrupt? */ if (vgic_dist_irq_is_pending(vcpu, irq)) { @@ -1201,13 +1201,13 @@ static bool vgic_process_maintenance(struct kvm_vcpu *vcpu) * Despite being EOIed, the LR may not have * been marked as empty. */ - set_bit(lr, (unsigned long *)vgic_cpu->vgic_elrsr); - vgic_cpu->vgic_lr[lr] &= ~GICH_LR_ACTIVE_BIT; + set_bit(lr, (unsigned long *)vgic_cpu->vgic_v2.vgic_elrsr); + vgic_cpu->vgic_v2.vgic_lr[lr] &= ~GICH_LR_ACTIVE_BIT; } } - if (vgic_cpu->vgic_misr & GICH_MISR_U) - vgic_cpu->vgic_hcr &= ~GICH_HCR_UIE; + if (vgic_cpu->vgic_v2.vgic_misr & GICH_MISR_U) + vgic_cpu->vgic_v2.vgic_hcr &= ~GICH_HCR_UIE; return level_pending; } @@ -1226,21 +1226,21 @@ static void __kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu) level_pending = vgic_process_maintenance(vcpu); /* Clear mappings for empty LRs */ - for_each_set_bit(lr, (unsigned long *)vgic_cpu->vgic_elrsr, + for_each_set_bit(lr, (unsigned long *)vgic_cpu->vgic_v2.vgic_elrsr, vgic_cpu->nr_lr) { int irq; if (!test_and_clear_bit(lr, vgic_cpu->lr_used)) continue; - irq = vgic_cpu->vgic_lr[lr] & GICH_LR_VIRTUALID; + irq = vgic_cpu->vgic_v2.vgic_lr[lr] & GICH_LR_VIRTUALID; BUG_ON(irq >= VGIC_NR_IRQS); vgic_cpu->vgic_irq_lr_map[irq] = LR_EMPTY; } /* Check if we still have something up our sleeve... */ - pending = find_first_zero_bit((unsigned long *)vgic_cpu->vgic_elrsr, + pending = find_first_zero_bit((unsigned long *)vgic_cpu->vgic_v2.vgic_elrsr, vgic_cpu->nr_lr); if (level_pending || pending < vgic_cpu->nr_lr) set_bit(vcpu->vcpu_id, &dist->irq_pending_on_cpu); @@ -1436,10 +1436,10 @@ int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu) * points to their reset values. Anything else resets to zero * anyway. */ - vgic_cpu->vgic_vmcr = 0; + vgic_cpu->vgic_v2.vgic_vmcr = 0; vgic_cpu->nr_lr = vgic_nr_lr; - vgic_cpu->vgic_hcr = GICH_HCR_EN; /* Get the show on the road... */ + vgic_cpu->vgic_v2.vgic_hcr = GICH_HCR_EN; /* Get the show on the road... */ return 0; } @@ -1746,15 +1746,15 @@ static bool handle_cpu_mmio_misc(struct kvm_vcpu *vcpu, } if (!mmio->is_write) { - reg = (vgic_cpu->vgic_vmcr & mask) >> shift; + reg = (vgic_cpu->vgic_v2.vgic_vmcr & mask) >> shift; mmio_data_write(mmio, ~0, reg); } else { reg = mmio_data_read(mmio, ~0); reg = (reg << shift) & mask; - if (reg != (vgic_cpu->vgic_vmcr & mask)) + if (reg != (vgic_cpu->vgic_v2.vgic_vmcr & mask)) updated = true; - vgic_cpu->vgic_vmcr &= ~mask; - vgic_cpu->vgic_vmcr |= reg; + vgic_cpu->vgic_v2.vgic_vmcr &= ~mask; + vgic_cpu->vgic_v2.vgic_vmcr |= reg; } return updated; } -- cgit v1.2.3 From 22eaf3ea5538c19657f99472d40ec2cc142dec04 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 3 Jun 2013 15:55:02 +0100 Subject: KVM: ARM: vgic: introduce vgic_ops and LR manipulation primitives In order to split the various register manipulation from the main vgic code, introduce a vgic_ops structure, and start by abstracting the LR manipulation code with a couple of accessors. Reviewed-by: Christoffer Dall Signed-off-by: Marc Zyngier (cherry picked from commit 8d5c6b06a5d5f8ebcf40558e566781d572920740) Signed-off-by: Christoffer Dall --- virt/kvm/arm/vgic.c | 162 +++++++++++++++++++++++++++++++++++----------------- 1 file changed, 110 insertions(+), 52 deletions(-) (limited to 'virt') diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index 85c501a16d71..680e3bbab61f 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -94,9 +94,12 @@ static struct device_node *vgic_node; #define ACCESS_WRITE_MASK(x) ((x) & (3 << 1)) static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu); +static void vgic_retire_lr(int lr_nr, int irq, struct kvm_vcpu *vcpu); static void vgic_update_state(struct kvm *kvm); static void vgic_kick_vcpus(struct kvm *kvm); static void vgic_dispatch_sgi(struct kvm_vcpu *vcpu, u32 reg); +static struct vgic_lr vgic_get_lr(const struct kvm_vcpu *vcpu, int lr); +static void vgic_set_lr(struct kvm_vcpu *vcpu, int lr, struct vgic_lr lr_desc); static u32 vgic_nr_lr; static unsigned int vgic_maint_irq; @@ -593,18 +596,6 @@ static bool handle_mmio_sgi_reg(struct kvm_vcpu *vcpu, return false; } -#define LR_CPUID(lr) \ - (((lr) & GICH_LR_PHYSID_CPUID) >> GICH_LR_PHYSID_CPUID_SHIFT) -#define LR_IRQID(lr) \ - ((lr) & GICH_LR_VIRTUALID) - -static void vgic_retire_lr(int lr_nr, int irq, struct vgic_cpu *vgic_cpu) -{ - clear_bit(lr_nr, vgic_cpu->lr_used); - vgic_cpu->vgic_v2.vgic_lr[lr_nr] &= ~GICH_LR_STATE; - vgic_cpu->vgic_irq_lr_map[irq] = LR_EMPTY; -} - /** * vgic_unqueue_irqs - move pending IRQs from LRs to the distributor * @vgic_cpu: Pointer to the vgic_cpu struct holding the LRs @@ -622,13 +613,10 @@ static void vgic_unqueue_irqs(struct kvm_vcpu *vcpu) struct vgic_dist *dist = &vcpu->kvm->arch.vgic; struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; int vcpu_id = vcpu->vcpu_id; - int i, irq, source_cpu; - u32 *lr; + int i; for_each_set_bit(i, vgic_cpu->lr_used, vgic_cpu->nr_lr) { - lr = &vgic_cpu->vgic_v2.vgic_lr[i]; - irq = LR_IRQID(*lr); - source_cpu = LR_CPUID(*lr); + struct vgic_lr lr = vgic_get_lr(vcpu, i); /* * There are three options for the state bits: @@ -640,7 +628,7 @@ static void vgic_unqueue_irqs(struct kvm_vcpu *vcpu) * If the LR holds only an active interrupt (not pending) then * just leave it alone. */ - if ((*lr & GICH_LR_STATE) == GICH_LR_ACTIVE_BIT) + if ((lr.state & LR_STATE_MASK) == LR_STATE_ACTIVE) continue; /* @@ -649,18 +637,19 @@ static void vgic_unqueue_irqs(struct kvm_vcpu *vcpu) * is fine, then we are only setting a few bits that were * already set. */ - vgic_dist_irq_set(vcpu, irq); - if (irq < VGIC_NR_SGIS) - dist->irq_sgi_sources[vcpu_id][irq] |= 1 << source_cpu; - *lr &= ~GICH_LR_PENDING_BIT; + vgic_dist_irq_set(vcpu, lr.irq); + if (lr.irq < VGIC_NR_SGIS) + dist->irq_sgi_sources[vcpu_id][lr.irq] |= 1 << lr.source; + lr.state &= ~LR_STATE_PENDING; + vgic_set_lr(vcpu, i, lr); /* * If there's no state left on the LR (it could still be * active), then the LR does not hold any useful info and can * be marked as free for other use. */ - if (!(*lr & GICH_LR_STATE)) - vgic_retire_lr(i, irq, vgic_cpu); + if (!(lr.state & LR_STATE_MASK)) + vgic_retire_lr(i, lr.irq, vcpu); /* Finally update the VGIC state. */ vgic_update_state(vcpu->kvm); @@ -989,8 +978,69 @@ static void vgic_update_state(struct kvm *kvm) } } -#define MK_LR_PEND(src, irq) \ - (GICH_LR_PENDING_BIT | ((src) << GICH_LR_PHYSID_CPUID_SHIFT) | (irq)) +static struct vgic_lr vgic_v2_get_lr(const struct kvm_vcpu *vcpu, int lr) +{ + struct vgic_lr lr_desc; + u32 val = vcpu->arch.vgic_cpu.vgic_v2.vgic_lr[lr]; + + lr_desc.irq = val & GICH_LR_VIRTUALID; + if (lr_desc.irq <= 15) + lr_desc.source = (val >> GICH_LR_PHYSID_CPUID_SHIFT) & 0x7; + else + lr_desc.source = 0; + lr_desc.state = 0; + + if (val & GICH_LR_PENDING_BIT) + lr_desc.state |= LR_STATE_PENDING; + if (val & GICH_LR_ACTIVE_BIT) + lr_desc.state |= LR_STATE_ACTIVE; + if (val & GICH_LR_EOI) + lr_desc.state |= LR_EOI_INT; + + return lr_desc; +} + +static void vgic_v2_set_lr(struct kvm_vcpu *vcpu, int lr, + struct vgic_lr lr_desc) +{ + u32 lr_val = (lr_desc.source << GICH_LR_PHYSID_CPUID_SHIFT) | lr_desc.irq; + + if (lr_desc.state & LR_STATE_PENDING) + lr_val |= GICH_LR_PENDING_BIT; + if (lr_desc.state & LR_STATE_ACTIVE) + lr_val |= GICH_LR_ACTIVE_BIT; + if (lr_desc.state & LR_EOI_INT) + lr_val |= GICH_LR_EOI; + + vcpu->arch.vgic_cpu.vgic_v2.vgic_lr[lr] = lr_val; +} + +static const struct vgic_ops vgic_ops = { + .get_lr = vgic_v2_get_lr, + .set_lr = vgic_v2_set_lr, +}; + +static struct vgic_lr vgic_get_lr(const struct kvm_vcpu *vcpu, int lr) +{ + return vgic_ops.get_lr(vcpu, lr); +} + +static void vgic_set_lr(struct kvm_vcpu *vcpu, int lr, + struct vgic_lr vlr) +{ + vgic_ops.set_lr(vcpu, lr, vlr); +} + +static void vgic_retire_lr(int lr_nr, int irq, struct kvm_vcpu *vcpu) +{ + struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; + struct vgic_lr vlr = vgic_get_lr(vcpu, lr_nr); + + vlr.state = 0; + vgic_set_lr(vcpu, lr_nr, vlr); + clear_bit(lr_nr, vgic_cpu->lr_used); + vgic_cpu->vgic_irq_lr_map[irq] = LR_EMPTY; +} /* * An interrupt may have been disabled after being made pending on the @@ -1007,12 +1057,12 @@ static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu) int lr; for_each_set_bit(lr, vgic_cpu->lr_used, vgic_cpu->nr_lr) { - int irq = vgic_cpu->vgic_v2.vgic_lr[lr] & GICH_LR_VIRTUALID; + struct vgic_lr vlr = vgic_get_lr(vcpu, lr); - if (!vgic_irq_is_enabled(vcpu, irq)) { - vgic_retire_lr(lr, irq, vgic_cpu); - if (vgic_irq_is_active(vcpu, irq)) - vgic_irq_clear_active(vcpu, irq); + if (!vgic_irq_is_enabled(vcpu, vlr.irq)) { + vgic_retire_lr(lr, vlr.irq, vcpu); + if (vgic_irq_is_active(vcpu, vlr.irq)) + vgic_irq_clear_active(vcpu, vlr.irq); } } } @@ -1024,6 +1074,7 @@ static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu) static bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 sgi_source_id, int irq) { struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; + struct vgic_lr vlr; int lr; /* Sanitize the input... */ @@ -1036,13 +1087,15 @@ static bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 sgi_source_id, int irq) lr = vgic_cpu->vgic_irq_lr_map[irq]; /* Do we have an active interrupt for the same CPUID? */ - if (lr != LR_EMPTY && - (LR_CPUID(vgic_cpu->vgic_v2.vgic_lr[lr]) == sgi_source_id)) { - kvm_debug("LR%d piggyback for IRQ%d %x\n", - lr, irq, vgic_cpu->vgic_v2.vgic_lr[lr]); - BUG_ON(!test_bit(lr, vgic_cpu->lr_used)); - vgic_cpu->vgic_v2.vgic_lr[lr] |= GICH_LR_PENDING_BIT; - return true; + if (lr != LR_EMPTY) { + vlr = vgic_get_lr(vcpu, lr); + if (vlr.source == sgi_source_id) { + kvm_debug("LR%d piggyback for IRQ%d\n", lr, vlr.irq); + BUG_ON(!test_bit(lr, vgic_cpu->lr_used)); + vlr.state |= LR_STATE_PENDING; + vgic_set_lr(vcpu, lr, vlr); + return true; + } } /* Try to use another LR for this interrupt */ @@ -1052,12 +1105,16 @@ static bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 sgi_source_id, int irq) return false; kvm_debug("LR%d allocated for IRQ%d %x\n", lr, irq, sgi_source_id); - vgic_cpu->vgic_v2.vgic_lr[lr] = MK_LR_PEND(sgi_source_id, irq); vgic_cpu->vgic_irq_lr_map[irq] = lr; set_bit(lr, vgic_cpu->lr_used); + vlr.irq = irq; + vlr.source = sgi_source_id; + vlr.state = LR_STATE_PENDING; if (!vgic_irq_is_edge(vcpu, irq)) - vgic_cpu->vgic_v2.vgic_lr[lr] |= GICH_LR_EOI; + vlr.state |= LR_EOI_INT; + + vgic_set_lr(vcpu, lr, vlr); return true; } @@ -1180,21 +1237,23 @@ static bool vgic_process_maintenance(struct kvm_vcpu *vcpu) * Some level interrupts have been EOIed. Clear their * active bit. */ - int lr, irq; + int lr; for_each_set_bit(lr, (unsigned long *)vgic_cpu->vgic_v2.vgic_eisr, vgic_cpu->nr_lr) { - irq = vgic_cpu->vgic_v2.vgic_lr[lr] & GICH_LR_VIRTUALID; + struct vgic_lr vlr = vgic_get_lr(vcpu, lr); - vgic_irq_clear_active(vcpu, irq); - vgic_cpu->vgic_v2.vgic_lr[lr] &= ~GICH_LR_EOI; + vgic_irq_clear_active(vcpu, vlr.irq); + WARN_ON(vlr.state & LR_STATE_MASK); + vlr.state = 0; + vgic_set_lr(vcpu, lr, vlr); /* Any additional pending interrupt? */ - if (vgic_dist_irq_is_pending(vcpu, irq)) { - vgic_cpu_irq_set(vcpu, irq); + if (vgic_dist_irq_is_pending(vcpu, vlr.irq)) { + vgic_cpu_irq_set(vcpu, vlr.irq); level_pending = true; } else { - vgic_cpu_irq_clear(vcpu, irq); + vgic_cpu_irq_clear(vcpu, vlr.irq); } /* @@ -1202,7 +1261,6 @@ static bool vgic_process_maintenance(struct kvm_vcpu *vcpu) * been marked as empty. */ set_bit(lr, (unsigned long *)vgic_cpu->vgic_v2.vgic_elrsr); - vgic_cpu->vgic_v2.vgic_lr[lr] &= ~GICH_LR_ACTIVE_BIT; } } @@ -1228,15 +1286,15 @@ static void __kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu) /* Clear mappings for empty LRs */ for_each_set_bit(lr, (unsigned long *)vgic_cpu->vgic_v2.vgic_elrsr, vgic_cpu->nr_lr) { - int irq; + struct vgic_lr vlr; if (!test_and_clear_bit(lr, vgic_cpu->lr_used)) continue; - irq = vgic_cpu->vgic_v2.vgic_lr[lr] & GICH_LR_VIRTUALID; + vlr = vgic_get_lr(vcpu, lr); - BUG_ON(irq >= VGIC_NR_IRQS); - vgic_cpu->vgic_irq_lr_map[irq] = LR_EMPTY; + BUG_ON(vlr.irq >= VGIC_NR_IRQS); + vgic_cpu->vgic_irq_lr_map[vlr.irq] = LR_EMPTY; } /* Check if we still have something up our sleeve... */ -- cgit v1.2.3 From 979ff618166563c8ad6039dffeb9f5443a4ac63d Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 4 Jun 2013 10:29:39 +0100 Subject: KVM: ARM: vgic: abstract access to the ELRSR bitmap Move the GICH_ELRSR access to its own functions, and add them to the vgic_ops structure. Acked-by: Catalin Marinas Reviewed-by: Christoffer Dall Signed-off-by: Marc Zyngier (cherry picked from commit 69bb2c9fbc11d9d4358fbb798db15c9092eb4d8c) Signed-off-by: Christoffer Dall --- virt/kvm/arm/vgic.c | 46 +++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 41 insertions(+), 5 deletions(-) (limited to 'virt') diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index 680e3bbab61f..05e655e6fed9 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -1015,9 +1015,32 @@ static void vgic_v2_set_lr(struct kvm_vcpu *vcpu, int lr, vcpu->arch.vgic_cpu.vgic_v2.vgic_lr[lr] = lr_val; } +static void vgic_v2_sync_lr_elrsr(struct kvm_vcpu *vcpu, int lr, + struct vgic_lr lr_desc) +{ + if (!(lr_desc.state & LR_STATE_MASK)) + set_bit(lr, (unsigned long *)vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr); +} + +static u64 vgic_v2_get_elrsr(const struct kvm_vcpu *vcpu) +{ + u64 val; + +#if BITS_PER_LONG == 64 + val = vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr[1]; + val <<= 32; + val |= vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr[0]; +#else + val = *(u64 *)vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr; +#endif + return val; +} + static const struct vgic_ops vgic_ops = { .get_lr = vgic_v2_get_lr, .set_lr = vgic_v2_set_lr, + .sync_lr_elrsr = vgic_v2_sync_lr_elrsr, + .get_elrsr = vgic_v2_get_elrsr, }; static struct vgic_lr vgic_get_lr(const struct kvm_vcpu *vcpu, int lr) @@ -1031,6 +1054,17 @@ static void vgic_set_lr(struct kvm_vcpu *vcpu, int lr, vgic_ops.set_lr(vcpu, lr, vlr); } +static void vgic_sync_lr_elrsr(struct kvm_vcpu *vcpu, int lr, + struct vgic_lr vlr) +{ + vgic_ops.sync_lr_elrsr(vcpu, lr, vlr); +} + +static inline u64 vgic_get_elrsr(struct kvm_vcpu *vcpu) +{ + return vgic_ops.get_elrsr(vcpu); +} + static void vgic_retire_lr(int lr_nr, int irq, struct kvm_vcpu *vcpu) { struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; @@ -1260,7 +1294,7 @@ static bool vgic_process_maintenance(struct kvm_vcpu *vcpu) * Despite being EOIed, the LR may not have * been marked as empty. */ - set_bit(lr, (unsigned long *)vgic_cpu->vgic_v2.vgic_elrsr); + vgic_sync_lr_elrsr(vcpu, lr, vlr); } } @@ -1278,14 +1312,17 @@ static void __kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu) { struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; struct vgic_dist *dist = &vcpu->kvm->arch.vgic; + u64 elrsr; + unsigned long *elrsr_ptr; int lr, pending; bool level_pending; level_pending = vgic_process_maintenance(vcpu); + elrsr = vgic_get_elrsr(vcpu); + elrsr_ptr = (unsigned long *)&elrsr; /* Clear mappings for empty LRs */ - for_each_set_bit(lr, (unsigned long *)vgic_cpu->vgic_v2.vgic_elrsr, - vgic_cpu->nr_lr) { + for_each_set_bit(lr, elrsr_ptr, vgic_cpu->nr_lr) { struct vgic_lr vlr; if (!test_and_clear_bit(lr, vgic_cpu->lr_used)) @@ -1298,8 +1335,7 @@ static void __kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu) } /* Check if we still have something up our sleeve... */ - pending = find_first_zero_bit((unsigned long *)vgic_cpu->vgic_v2.vgic_elrsr, - vgic_cpu->nr_lr); + pending = find_first_zero_bit(elrsr_ptr, vgic_cpu->nr_lr); if (level_pending || pending < vgic_cpu->nr_lr) set_bit(vcpu->vcpu_id, &dist->irq_pending_on_cpu); } -- cgit v1.2.3 From b6ddec4aee27dcb4b77d6d3c136ae6633e854d4c Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 4 Jun 2013 10:33:43 +0100 Subject: KVM: ARM: vgic: abstract EISR bitmap access Move the GICH_EISR access to its own function. Acked-by: Catalin Marinas Reviewed-by: Christoffer Dall Signed-off-by: Marc Zyngier (cherry picked from commit 8d6a0313c125c3c7b208b75695fe6ab00afab4c5) Signed-off-by: Christoffer Dall --- virt/kvm/arm/vgic.c | 25 +++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) (limited to 'virt') diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index 05e655e6fed9..dfbbe6b7be35 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -1036,11 +1036,26 @@ static u64 vgic_v2_get_elrsr(const struct kvm_vcpu *vcpu) return val; } +static u64 vgic_v2_get_eisr(const struct kvm_vcpu *vcpu) +{ + u64 val; + +#if BITS_PER_LONG == 64 + val = vcpu->arch.vgic_cpu.vgic_v2.vgic_eisr[1]; + val <<= 32; + val |= vcpu->arch.vgic_cpu.vgic_v2.vgic_eisr[0]; +#else + val = *(u64 *)vcpu->arch.vgic_cpu.vgic_v2.vgic_eisr; +#endif + return val; +} + static const struct vgic_ops vgic_ops = { .get_lr = vgic_v2_get_lr, .set_lr = vgic_v2_set_lr, .sync_lr_elrsr = vgic_v2_sync_lr_elrsr, .get_elrsr = vgic_v2_get_elrsr, + .get_eisr = vgic_v2_get_eisr, }; static struct vgic_lr vgic_get_lr(const struct kvm_vcpu *vcpu, int lr) @@ -1065,6 +1080,11 @@ static inline u64 vgic_get_elrsr(struct kvm_vcpu *vcpu) return vgic_ops.get_elrsr(vcpu); } +static inline u64 vgic_get_eisr(struct kvm_vcpu *vcpu) +{ + return vgic_ops.get_eisr(vcpu); +} + static void vgic_retire_lr(int lr_nr, int irq, struct kvm_vcpu *vcpu) { struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; @@ -1271,10 +1291,11 @@ static bool vgic_process_maintenance(struct kvm_vcpu *vcpu) * Some level interrupts have been EOIed. Clear their * active bit. */ + u64 eisr = vgic_get_eisr(vcpu); + unsigned long *eisr_ptr = (unsigned long *)&eisr; int lr; - for_each_set_bit(lr, (unsigned long *)vgic_cpu->vgic_v2.vgic_eisr, - vgic_cpu->nr_lr) { + for_each_set_bit(lr, eisr_ptr, vgic_cpu->nr_lr) { struct vgic_lr vlr = vgic_get_lr(vcpu, lr); vgic_irq_clear_active(vcpu, vlr.irq); -- cgit v1.2.3 From 1524b3b98345f77a919b73b09116ab1852f140b1 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 4 Jun 2013 11:02:10 +0100 Subject: KVM: ARM: vgic: abstract MISR decoding Instead of directly dealing with the GICH_MISR bits, move the code to its own function and use a couple of public flags to represent the actual state. Acked-by: Catalin Marinas Reviewed-by: Christoffer Dall Signed-off-by: Marc Zyngier (cherry picked from commit 495dd859f304689a7cd5ef413c439cb090dc25e6) Signed-off-by: Christoffer Dall --- virt/kvm/arm/vgic.c | 26 +++++++++++++++++++++++--- 1 file changed, 23 insertions(+), 3 deletions(-) (limited to 'virt') diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index dfbbe6b7be35..023f97e5e5da 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -1050,12 +1050,26 @@ static u64 vgic_v2_get_eisr(const struct kvm_vcpu *vcpu) return val; } +static u32 vgic_v2_get_interrupt_status(const struct kvm_vcpu *vcpu) +{ + u32 misr = vcpu->arch.vgic_cpu.vgic_v2.vgic_misr; + u32 ret = 0; + + if (misr & GICH_MISR_EOI) + ret |= INT_STATUS_EOI; + if (misr & GICH_MISR_U) + ret |= INT_STATUS_UNDERFLOW; + + return ret; +} + static const struct vgic_ops vgic_ops = { .get_lr = vgic_v2_get_lr, .set_lr = vgic_v2_set_lr, .sync_lr_elrsr = vgic_v2_sync_lr_elrsr, .get_elrsr = vgic_v2_get_elrsr, .get_eisr = vgic_v2_get_eisr, + .get_interrupt_status = vgic_v2_get_interrupt_status, }; static struct vgic_lr vgic_get_lr(const struct kvm_vcpu *vcpu, int lr) @@ -1085,6 +1099,11 @@ static inline u64 vgic_get_eisr(struct kvm_vcpu *vcpu) return vgic_ops.get_eisr(vcpu); } +static inline u32 vgic_get_interrupt_status(struct kvm_vcpu *vcpu) +{ + return vgic_ops.get_interrupt_status(vcpu); +} + static void vgic_retire_lr(int lr_nr, int irq, struct kvm_vcpu *vcpu) { struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; @@ -1282,11 +1301,12 @@ epilog: static bool vgic_process_maintenance(struct kvm_vcpu *vcpu) { struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; + u32 status = vgic_get_interrupt_status(vcpu); bool level_pending = false; - kvm_debug("MISR = %08x\n", vgic_cpu->vgic_v2.vgic_misr); + kvm_debug("STATUS = %08x\n", status); - if (vgic_cpu->vgic_v2.vgic_misr & GICH_MISR_EOI) { + if (status & INT_STATUS_EOI) { /* * Some level interrupts have been EOIed. Clear their * active bit. @@ -1319,7 +1339,7 @@ static bool vgic_process_maintenance(struct kvm_vcpu *vcpu) } } - if (vgic_cpu->vgic_v2.vgic_misr & GICH_MISR_U) + if (status & INT_STATUS_UNDERFLOW) vgic_cpu->vgic_v2.vgic_hcr &= ~GICH_HCR_UIE; return level_pending; -- cgit v1.2.3 From 0236bc25e086d4aaf89c486aab94597382984e10 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 4 Jun 2013 11:24:17 +0100 Subject: KVM: ARM: vgic: move underflow handling to vgic_ops Move the code dealing with LR underflow handling to its own functions, and make them accessible through vgic_ops. Acked-by: Catalin Marinas Reviewed-by: Christoffer Dall Signed-off-by: Marc Zyngier (cherry picked from commit 909d9b5025f149af6cfc304a76ad6218e6622cc0) Signed-off-by: Christoffer Dall --- virt/kvm/arm/vgic.c | 28 +++++++++++++++++++++++++--- 1 file changed, 25 insertions(+), 3 deletions(-) (limited to 'virt') diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index 023f97e5e5da..b5cd797acf1c 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -1063,6 +1063,16 @@ static u32 vgic_v2_get_interrupt_status(const struct kvm_vcpu *vcpu) return ret; } +static void vgic_v2_enable_underflow(struct kvm_vcpu *vcpu) +{ + vcpu->arch.vgic_cpu.vgic_v2.vgic_hcr |= GICH_HCR_UIE; +} + +static void vgic_v2_disable_underflow(struct kvm_vcpu *vcpu) +{ + vcpu->arch.vgic_cpu.vgic_v2.vgic_hcr &= ~GICH_HCR_UIE; +} + static const struct vgic_ops vgic_ops = { .get_lr = vgic_v2_get_lr, .set_lr = vgic_v2_set_lr, @@ -1070,6 +1080,8 @@ static const struct vgic_ops vgic_ops = { .get_elrsr = vgic_v2_get_elrsr, .get_eisr = vgic_v2_get_eisr, .get_interrupt_status = vgic_v2_get_interrupt_status, + .enable_underflow = vgic_v2_enable_underflow, + .disable_underflow = vgic_v2_disable_underflow, }; static struct vgic_lr vgic_get_lr(const struct kvm_vcpu *vcpu, int lr) @@ -1104,6 +1116,16 @@ static inline u32 vgic_get_interrupt_status(struct kvm_vcpu *vcpu) return vgic_ops.get_interrupt_status(vcpu); } +static inline void vgic_enable_underflow(struct kvm_vcpu *vcpu) +{ + vgic_ops.enable_underflow(vcpu); +} + +static inline void vgic_disable_underflow(struct kvm_vcpu *vcpu) +{ + vgic_ops.disable_underflow(vcpu); +} + static void vgic_retire_lr(int lr_nr, int irq, struct kvm_vcpu *vcpu) { struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; @@ -1285,9 +1307,9 @@ static void __kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu) epilog: if (overflow) { - vgic_cpu->vgic_v2.vgic_hcr |= GICH_HCR_UIE; + vgic_enable_underflow(vcpu); } else { - vgic_cpu->vgic_v2.vgic_hcr &= ~GICH_HCR_UIE; + vgic_disable_underflow(vcpu); /* * We're about to run this VCPU, and we've consumed * everything the distributor had in store for @@ -1340,7 +1362,7 @@ static bool vgic_process_maintenance(struct kvm_vcpu *vcpu) } if (status & INT_STATUS_UNDERFLOW) - vgic_cpu->vgic_v2.vgic_hcr &= ~GICH_HCR_UIE; + vgic_disable_underflow(vcpu); return level_pending; } -- cgit v1.2.3 From 36be06ffefd9ebcfcdf9bba1ca305242f586e5f9 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 4 Feb 2014 17:48:10 +0000 Subject: KVM: ARM: vgic: abstract VMCR access Instead of directly messing with with the GICH_VMCR bits for the CPU interface save/restore code, add accessors that encode/decode the entire set of registers exposed by VMCR. Not the most efficient thing, but given that this code is only used by the save/restore code, performance is far from being critical. Reviewed-by: Christoffer Dall Signed-off-by: Marc Zyngier (cherry picked from commit beee38b9d0c0ea6cf2a7f35c3108f7d8281d4545) Signed-off-by: Christoffer Dall --- virt/kvm/arm/vgic.c | 69 ++++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 53 insertions(+), 16 deletions(-) (limited to 'virt') diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index b5cd797acf1c..7d6bd612e799 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -100,8 +100,10 @@ static void vgic_kick_vcpus(struct kvm *kvm); static void vgic_dispatch_sgi(struct kvm_vcpu *vcpu, u32 reg); static struct vgic_lr vgic_get_lr(const struct kvm_vcpu *vcpu, int lr); static void vgic_set_lr(struct kvm_vcpu *vcpu, int lr, struct vgic_lr lr_desc); -static u32 vgic_nr_lr; +static void vgic_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr); +static void vgic_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr); +static u32 vgic_nr_lr; static unsigned int vgic_maint_irq; static u32 *vgic_bitmap_get_reg(struct vgic_bitmap *x, @@ -1073,6 +1075,28 @@ static void vgic_v2_disable_underflow(struct kvm_vcpu *vcpu) vcpu->arch.vgic_cpu.vgic_v2.vgic_hcr &= ~GICH_HCR_UIE; } +static void vgic_v2_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp) +{ + u32 vmcr = vcpu->arch.vgic_cpu.vgic_v2.vgic_vmcr; + + vmcrp->ctlr = (vmcr & GICH_VMCR_CTRL_MASK) >> GICH_VMCR_CTRL_SHIFT; + vmcrp->abpr = (vmcr & GICH_VMCR_ALIAS_BINPOINT_MASK) >> GICH_VMCR_ALIAS_BINPOINT_SHIFT; + vmcrp->bpr = (vmcr & GICH_VMCR_BINPOINT_MASK) >> GICH_VMCR_BINPOINT_SHIFT; + vmcrp->pmr = (vmcr & GICH_VMCR_PRIMASK_MASK) >> GICH_VMCR_PRIMASK_SHIFT; +} + +static void vgic_v2_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp) +{ + u32 vmcr; + + vmcr = (vmcrp->ctlr << GICH_VMCR_CTRL_SHIFT) & GICH_VMCR_CTRL_MASK; + vmcr |= (vmcrp->abpr << GICH_VMCR_ALIAS_BINPOINT_SHIFT) & GICH_VMCR_ALIAS_BINPOINT_MASK; + vmcr |= (vmcrp->bpr << GICH_VMCR_BINPOINT_SHIFT) & GICH_VMCR_BINPOINT_MASK; + vmcr |= (vmcrp->pmr << GICH_VMCR_PRIMASK_SHIFT) & GICH_VMCR_PRIMASK_MASK; + + vcpu->arch.vgic_cpu.vgic_v2.vgic_vmcr = vmcr; +} + static const struct vgic_ops vgic_ops = { .get_lr = vgic_v2_get_lr, .set_lr = vgic_v2_set_lr, @@ -1082,6 +1106,8 @@ static const struct vgic_ops vgic_ops = { .get_interrupt_status = vgic_v2_get_interrupt_status, .enable_underflow = vgic_v2_enable_underflow, .disable_underflow = vgic_v2_disable_underflow, + .get_vmcr = vgic_v2_get_vmcr, + .set_vmcr = vgic_v2_set_vmcr, }; static struct vgic_lr vgic_get_lr(const struct kvm_vcpu *vcpu, int lr) @@ -1126,6 +1152,16 @@ static inline void vgic_disable_underflow(struct kvm_vcpu *vcpu) vgic_ops.disable_underflow(vcpu); } +static inline void vgic_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr) +{ + vgic_ops.get_vmcr(vcpu, vmcr); +} + +static void vgic_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr) +{ + vgic_ops.set_vmcr(vcpu, vmcr); +} + static void vgic_retire_lr(int lr_nr, int irq, struct kvm_vcpu *vcpu) { struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; @@ -1879,39 +1915,40 @@ int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write) static bool handle_cpu_mmio_misc(struct kvm_vcpu *vcpu, struct kvm_exit_mmio *mmio, phys_addr_t offset) { - struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; - u32 reg, mask = 0, shift = 0; bool updated = false; + struct vgic_vmcr vmcr; + u32 *vmcr_field; + u32 reg; + + vgic_get_vmcr(vcpu, &vmcr); switch (offset & ~0x3) { case GIC_CPU_CTRL: - mask = GICH_VMCR_CTRL_MASK; - shift = GICH_VMCR_CTRL_SHIFT; + vmcr_field = &vmcr.ctlr; break; case GIC_CPU_PRIMASK: - mask = GICH_VMCR_PRIMASK_MASK; - shift = GICH_VMCR_PRIMASK_SHIFT; + vmcr_field = &vmcr.pmr; break; case GIC_CPU_BINPOINT: - mask = GICH_VMCR_BINPOINT_MASK; - shift = GICH_VMCR_BINPOINT_SHIFT; + vmcr_field = &vmcr.bpr; break; case GIC_CPU_ALIAS_BINPOINT: - mask = GICH_VMCR_ALIAS_BINPOINT_MASK; - shift = GICH_VMCR_ALIAS_BINPOINT_SHIFT; + vmcr_field = &vmcr.abpr; break; + default: + BUG(); } if (!mmio->is_write) { - reg = (vgic_cpu->vgic_v2.vgic_vmcr & mask) >> shift; + reg = *vmcr_field; mmio_data_write(mmio, ~0, reg); } else { reg = mmio_data_read(mmio, ~0); - reg = (reg << shift) & mask; - if (reg != (vgic_cpu->vgic_v2.vgic_vmcr & mask)) + if (reg != *vmcr_field) { + *vmcr_field = reg; + vgic_set_vmcr(vcpu, &vmcr); updated = true; - vgic_cpu->vgic_v2.vgic_vmcr &= ~mask; - vgic_cpu->vgic_v2.vgic_vmcr |= reg; + } } return updated; } -- cgit v1.2.3 From 4f0d623b2c3e9613879134a331cb4e84e46aad04 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 4 Jun 2013 11:36:38 +0100 Subject: KVM: ARM: vgic: introduce vgic_enable Move the code dealing with enabling the VGIC on to vgic_ops. Acked-by: Catalin Marinas Reviewed-by: Christoffer Dall Signed-off-by: Marc Zyngier (cherry picked from commit da8dafd1777cdd93091207952297d221a88e6479) Signed-off-by: Christoffer Dall --- virt/kvm/arm/vgic.c | 29 +++++++++++++++++++++-------- 1 file changed, 21 insertions(+), 8 deletions(-) (limited to 'virt') diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index 7d6bd612e799..02a0bd53eef9 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -1097,6 +1097,19 @@ static void vgic_v2_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp) vcpu->arch.vgic_cpu.vgic_v2.vgic_vmcr = vmcr; } +static void vgic_v2_enable(struct kvm_vcpu *vcpu) +{ + /* + * By forcing VMCR to zero, the GIC will restore the binary + * points to their reset values. Anything else resets to zero + * anyway. + */ + vcpu->arch.vgic_cpu.vgic_v2.vgic_vmcr = 0; + + /* Get the show on the road... */ + vcpu->arch.vgic_cpu.vgic_v2.vgic_hcr = GICH_HCR_EN; +} + static const struct vgic_ops vgic_ops = { .get_lr = vgic_v2_get_lr, .set_lr = vgic_v2_set_lr, @@ -1108,6 +1121,7 @@ static const struct vgic_ops vgic_ops = { .disable_underflow = vgic_v2_disable_underflow, .get_vmcr = vgic_v2_get_vmcr, .set_vmcr = vgic_v2_set_vmcr, + .enable = vgic_v2_enable, }; static struct vgic_lr vgic_get_lr(const struct kvm_vcpu *vcpu, int lr) @@ -1162,6 +1176,11 @@ static void vgic_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr) vgic_ops.set_vmcr(vcpu, vmcr); } +static inline void vgic_enable(struct kvm_vcpu *vcpu) +{ + vgic_ops.enable(vcpu); +} + static void vgic_retire_lr(int lr_nr, int irq, struct kvm_vcpu *vcpu) { struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; @@ -1624,15 +1643,9 @@ int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu) vgic_cpu->vgic_irq_lr_map[i] = LR_EMPTY; } - /* - * By forcing VMCR to zero, the GIC will restore the binary - * points to their reset values. Anything else resets to zero - * anyway. - */ - vgic_cpu->vgic_v2.vgic_vmcr = 0; - vgic_cpu->nr_lr = vgic_nr_lr; - vgic_cpu->vgic_v2.vgic_hcr = GICH_HCR_EN; /* Get the show on the road... */ + + vgic_enable(vcpu); return 0; } -- cgit v1.2.3 From b1dc4c705922f17d929d66acafecfdb0f33239df Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 18 Jun 2013 19:17:28 +0100 Subject: KVM: ARM: introduce vgic_params structure Move all the data specific to a given GIC implementation into its own little structure. Acked-by: Catalin Marinas Reviewed-by: Christoffer Dall Signed-off-by: Marc Zyngier (cherry picked from commit ca85f623e37d096206e092ef037a145a60fa7f85) Signed-off-by: Christoffer Dall --- virt/kvm/arm/vgic.c | 70 +++++++++++++++++++++++++---------------------------- 1 file changed, 33 insertions(+), 37 deletions(-) (limited to 'virt') diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index 02a0bd53eef9..2a35cdbf343c 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -76,14 +76,6 @@ #define IMPLEMENTER_ARM 0x43b #define GICC_ARCH_VERSION_V2 0x2 -/* Physical address of vgic virtual cpu interface */ -static phys_addr_t vgic_vcpu_base; - -/* Virtual control interface base address */ -static void __iomem *vgic_vctrl_base; - -static struct device_node *vgic_node; - #define ACCESS_READ_VALUE (1 << 0) #define ACCESS_READ_RAZ (0 << 0) #define ACCESS_READ_MASK(x) ((x) & (1 << 0)) @@ -103,8 +95,7 @@ static void vgic_set_lr(struct kvm_vcpu *vcpu, int lr, struct vgic_lr lr_desc); static void vgic_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr); static void vgic_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr); -static u32 vgic_nr_lr; -static unsigned int vgic_maint_irq; +static struct vgic_params vgic; static u32 *vgic_bitmap_get_reg(struct vgic_bitmap *x, int cpuid, u32 offset) @@ -1206,7 +1197,7 @@ static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu) struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; int lr; - for_each_set_bit(lr, vgic_cpu->lr_used, vgic_cpu->nr_lr) { + for_each_set_bit(lr, vgic_cpu->lr_used, vgic.nr_lr) { struct vgic_lr vlr = vgic_get_lr(vcpu, lr); if (!vgic_irq_is_enabled(vcpu, vlr.irq)) { @@ -1250,8 +1241,8 @@ static bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 sgi_source_id, int irq) /* Try to use another LR for this interrupt */ lr = find_first_zero_bit((unsigned long *)vgic_cpu->lr_used, - vgic_cpu->nr_lr); - if (lr >= vgic_cpu->nr_lr) + vgic.nr_lr); + if (lr >= vgic.nr_lr) return false; kvm_debug("LR%d allocated for IRQ%d %x\n", lr, irq, sgi_source_id); @@ -1377,7 +1368,6 @@ epilog: static bool vgic_process_maintenance(struct kvm_vcpu *vcpu) { - struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; u32 status = vgic_get_interrupt_status(vcpu); bool level_pending = false; @@ -1392,7 +1382,7 @@ static bool vgic_process_maintenance(struct kvm_vcpu *vcpu) unsigned long *eisr_ptr = (unsigned long *)&eisr; int lr; - for_each_set_bit(lr, eisr_ptr, vgic_cpu->nr_lr) { + for_each_set_bit(lr, eisr_ptr, vgic.nr_lr) { struct vgic_lr vlr = vgic_get_lr(vcpu, lr); vgic_irq_clear_active(vcpu, vlr.irq); @@ -1440,7 +1430,7 @@ static void __kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu) elrsr_ptr = (unsigned long *)&elrsr; /* Clear mappings for empty LRs */ - for_each_set_bit(lr, elrsr_ptr, vgic_cpu->nr_lr) { + for_each_set_bit(lr, elrsr_ptr, vgic.nr_lr) { struct vgic_lr vlr; if (!test_and_clear_bit(lr, vgic_cpu->lr_used)) @@ -1453,8 +1443,8 @@ static void __kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu) } /* Check if we still have something up our sleeve... */ - pending = find_first_zero_bit(elrsr_ptr, vgic_cpu->nr_lr); - if (level_pending || pending < vgic_cpu->nr_lr) + pending = find_first_zero_bit(elrsr_ptr, vgic.nr_lr); + if (level_pending || pending < vgic.nr_lr) set_bit(vcpu->vcpu_id, &dist->irq_pending_on_cpu); } @@ -1643,7 +1633,12 @@ int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu) vgic_cpu->vgic_irq_lr_map[i] = LR_EMPTY; } - vgic_cpu->nr_lr = vgic_nr_lr; + /* + * Store the number of LRs per vcpu, so we don't have to go + * all the way to the distributor structure to find out. Only + * assembly code should use this one. + */ + vgic_cpu->nr_lr = vgic.nr_lr; vgic_enable(vcpu); @@ -1652,7 +1647,7 @@ int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu) static void vgic_init_maintenance_interrupt(void *info) { - enable_percpu_irq(vgic_maint_irq, 0); + enable_percpu_irq(vgic.maint_irq, 0); } static int vgic_cpu_notify(struct notifier_block *self, @@ -1665,7 +1660,7 @@ static int vgic_cpu_notify(struct notifier_block *self, break; case CPU_DYING: case CPU_DYING_FROZEN: - disable_percpu_irq(vgic_maint_irq); + disable_percpu_irq(vgic.maint_irq); break; } @@ -1681,6 +1676,7 @@ int kvm_vgic_hyp_init(void) int ret; struct resource vctrl_res; struct resource vcpu_res; + struct device_node *vgic_node; vgic_node = of_find_compatible_node(NULL, NULL, "arm,cortex-a15-gic"); if (!vgic_node) { @@ -1688,17 +1684,17 @@ int kvm_vgic_hyp_init(void) return -ENODEV; } - vgic_maint_irq = irq_of_parse_and_map(vgic_node, 0); - if (!vgic_maint_irq) { + vgic.maint_irq = irq_of_parse_and_map(vgic_node, 0); + if (!vgic.maint_irq) { kvm_err("error getting vgic maintenance irq from DT\n"); ret = -ENXIO; goto out; } - ret = request_percpu_irq(vgic_maint_irq, vgic_maintenance_handler, + ret = request_percpu_irq(vgic.maint_irq, vgic_maintenance_handler, "vgic", kvm_get_running_vcpus()); if (ret) { - kvm_err("Cannot register interrupt %d\n", vgic_maint_irq); + kvm_err("Cannot register interrupt %d\n", vgic.maint_irq); goto out; } @@ -1714,18 +1710,18 @@ int kvm_vgic_hyp_init(void) goto out_free_irq; } - vgic_vctrl_base = of_iomap(vgic_node, 2); - if (!vgic_vctrl_base) { + vgic.vctrl_base = of_iomap(vgic_node, 2); + if (!vgic.vctrl_base) { kvm_err("Cannot ioremap VCTRL\n"); ret = -ENOMEM; goto out_free_irq; } - vgic_nr_lr = readl_relaxed(vgic_vctrl_base + GICH_VTR); - vgic_nr_lr = (vgic_nr_lr & 0x3f) + 1; + vgic.nr_lr = readl_relaxed(vgic.vctrl_base + GICH_VTR); + vgic.nr_lr = (vgic.nr_lr & 0x3f) + 1; - ret = create_hyp_io_mappings(vgic_vctrl_base, - vgic_vctrl_base + resource_size(&vctrl_res), + ret = create_hyp_io_mappings(vgic.vctrl_base, + vgic.vctrl_base + resource_size(&vctrl_res), vctrl_res.start); if (ret) { kvm_err("Cannot map VCTRL into hyp\n"); @@ -1733,7 +1729,7 @@ int kvm_vgic_hyp_init(void) } kvm_info("%s@%llx IRQ%d\n", vgic_node->name, - vctrl_res.start, vgic_maint_irq); + vctrl_res.start, vgic.maint_irq); on_each_cpu(vgic_init_maintenance_interrupt, NULL, 1); if (of_address_to_resource(vgic_node, 3, &vcpu_res)) { @@ -1741,14 +1737,14 @@ int kvm_vgic_hyp_init(void) ret = -ENXIO; goto out_unmap; } - vgic_vcpu_base = vcpu_res.start; + vgic.vcpu_base = vcpu_res.start; goto out; out_unmap: - iounmap(vgic_vctrl_base); + iounmap(vgic.vctrl_base); out_free_irq: - free_percpu_irq(vgic_maint_irq, kvm_get_running_vcpus()); + free_percpu_irq(vgic.maint_irq, kvm_get_running_vcpus()); out: of_node_put(vgic_node); return ret; @@ -1783,7 +1779,7 @@ int kvm_vgic_init(struct kvm *kvm) } ret = kvm_phys_addr_ioremap(kvm, kvm->arch.vgic.vgic_cpu_base, - vgic_vcpu_base, KVM_VGIC_V2_CPU_SIZE); + vgic.vcpu_base, KVM_VGIC_V2_CPU_SIZE); if (ret) { kvm_err("Unable to remap VGIC CPU to VCPU\n"); goto out; @@ -1829,7 +1825,7 @@ int kvm_vgic_create(struct kvm *kvm) } spin_lock_init(&kvm->arch.vgic.lock); - kvm->arch.vgic.vctrl_base = vgic_vctrl_base; + kvm->arch.vgic.vctrl_base = vgic.vctrl_base; kvm->arch.vgic.vgic_dist_base = VGIC_ADDR_UNDEF; kvm->arch.vgic.vgic_cpu_base = VGIC_ADDR_UNDEF; -- cgit v1.2.3 From cca28057dddfb2e8958ff0b8d008518ac4e7bb9a Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 4 Feb 2014 18:13:03 +0000 Subject: KVM: ARM: vgic: split GICv2 backend from the main vgic code Brutally hack the innocent vgic code, and move the GICv2 specific code to its own file, using vgic_ops and vgic_params as a way to pass information between the two blocks. Acked-by: Catalin Marinas Reviewed-by: Christoffer Dall Signed-off-by: Marc Zyngier (cherry picked from commit 8f186d522c69bb18dd9b93a634da4953228c67d4) Signed-off-by: Christoffer Dall --- virt/kvm/arm/vgic-v2.c | 248 +++++++++++++++++++++++++++++++++++++++++++++ virt/kvm/arm/vgic.c | 267 +++++++++---------------------------------------- 2 files changed, 294 insertions(+), 221 deletions(-) create mode 100644 virt/kvm/arm/vgic-v2.c (limited to 'virt') diff --git a/virt/kvm/arm/vgic-v2.c b/virt/kvm/arm/vgic-v2.c new file mode 100644 index 000000000000..940418ebd0d0 --- /dev/null +++ b/virt/kvm/arm/vgic-v2.c @@ -0,0 +1,248 @@ +/* + * Copyright (C) 2012,2013 ARM Limited, All Rights Reserved. + * Author: Marc Zyngier + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include +#include + +static struct vgic_lr vgic_v2_get_lr(const struct kvm_vcpu *vcpu, int lr) +{ + struct vgic_lr lr_desc; + u32 val = vcpu->arch.vgic_cpu.vgic_v2.vgic_lr[lr]; + + lr_desc.irq = val & GICH_LR_VIRTUALID; + if (lr_desc.irq <= 15) + lr_desc.source = (val >> GICH_LR_PHYSID_CPUID_SHIFT) & 0x7; + else + lr_desc.source = 0; + lr_desc.state = 0; + + if (val & GICH_LR_PENDING_BIT) + lr_desc.state |= LR_STATE_PENDING; + if (val & GICH_LR_ACTIVE_BIT) + lr_desc.state |= LR_STATE_ACTIVE; + if (val & GICH_LR_EOI) + lr_desc.state |= LR_EOI_INT; + + return lr_desc; +} + +static void vgic_v2_set_lr(struct kvm_vcpu *vcpu, int lr, + struct vgic_lr lr_desc) +{ + u32 lr_val = (lr_desc.source << GICH_LR_PHYSID_CPUID_SHIFT) | lr_desc.irq; + + if (lr_desc.state & LR_STATE_PENDING) + lr_val |= GICH_LR_PENDING_BIT; + if (lr_desc.state & LR_STATE_ACTIVE) + lr_val |= GICH_LR_ACTIVE_BIT; + if (lr_desc.state & LR_EOI_INT) + lr_val |= GICH_LR_EOI; + + vcpu->arch.vgic_cpu.vgic_v2.vgic_lr[lr] = lr_val; +} + +static void vgic_v2_sync_lr_elrsr(struct kvm_vcpu *vcpu, int lr, + struct vgic_lr lr_desc) +{ + if (!(lr_desc.state & LR_STATE_MASK)) + set_bit(lr, (unsigned long *)vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr); +} + +static u64 vgic_v2_get_elrsr(const struct kvm_vcpu *vcpu) +{ + u64 val; + +#if BITS_PER_LONG == 64 + val = vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr[1]; + val <<= 32; + val |= vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr[0]; +#else + val = *(u64 *)vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr; +#endif + return val; +} + +static u64 vgic_v2_get_eisr(const struct kvm_vcpu *vcpu) +{ + u64 val; + +#if BITS_PER_LONG == 64 + val = vcpu->arch.vgic_cpu.vgic_v2.vgic_eisr[1]; + val <<= 32; + val |= vcpu->arch.vgic_cpu.vgic_v2.vgic_eisr[0]; +#else + val = *(u64 *)vcpu->arch.vgic_cpu.vgic_v2.vgic_eisr; +#endif + return val; +} + +static u32 vgic_v2_get_interrupt_status(const struct kvm_vcpu *vcpu) +{ + u32 misr = vcpu->arch.vgic_cpu.vgic_v2.vgic_misr; + u32 ret = 0; + + if (misr & GICH_MISR_EOI) + ret |= INT_STATUS_EOI; + if (misr & GICH_MISR_U) + ret |= INT_STATUS_UNDERFLOW; + + return ret; +} + +static void vgic_v2_enable_underflow(struct kvm_vcpu *vcpu) +{ + vcpu->arch.vgic_cpu.vgic_v2.vgic_hcr |= GICH_HCR_UIE; +} + +static void vgic_v2_disable_underflow(struct kvm_vcpu *vcpu) +{ + vcpu->arch.vgic_cpu.vgic_v2.vgic_hcr &= ~GICH_HCR_UIE; +} + +static void vgic_v2_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp) +{ + u32 vmcr = vcpu->arch.vgic_cpu.vgic_v2.vgic_vmcr; + + vmcrp->ctlr = (vmcr & GICH_VMCR_CTRL_MASK) >> GICH_VMCR_CTRL_SHIFT; + vmcrp->abpr = (vmcr & GICH_VMCR_ALIAS_BINPOINT_MASK) >> GICH_VMCR_ALIAS_BINPOINT_SHIFT; + vmcrp->bpr = (vmcr & GICH_VMCR_BINPOINT_MASK) >> GICH_VMCR_BINPOINT_SHIFT; + vmcrp->pmr = (vmcr & GICH_VMCR_PRIMASK_MASK) >> GICH_VMCR_PRIMASK_SHIFT; +} + +static void vgic_v2_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp) +{ + u32 vmcr; + + vmcr = (vmcrp->ctlr << GICH_VMCR_CTRL_SHIFT) & GICH_VMCR_CTRL_MASK; + vmcr |= (vmcrp->abpr << GICH_VMCR_ALIAS_BINPOINT_SHIFT) & GICH_VMCR_ALIAS_BINPOINT_MASK; + vmcr |= (vmcrp->bpr << GICH_VMCR_BINPOINT_SHIFT) & GICH_VMCR_BINPOINT_MASK; + vmcr |= (vmcrp->pmr << GICH_VMCR_PRIMASK_SHIFT) & GICH_VMCR_PRIMASK_MASK; + + vcpu->arch.vgic_cpu.vgic_v2.vgic_vmcr = vmcr; +} + +static void vgic_v2_enable(struct kvm_vcpu *vcpu) +{ + /* + * By forcing VMCR to zero, the GIC will restore the binary + * points to their reset values. Anything else resets to zero + * anyway. + */ + vcpu->arch.vgic_cpu.vgic_v2.vgic_vmcr = 0; + + /* Get the show on the road... */ + vcpu->arch.vgic_cpu.vgic_v2.vgic_hcr = GICH_HCR_EN; +} + +static const struct vgic_ops vgic_v2_ops = { + .get_lr = vgic_v2_get_lr, + .set_lr = vgic_v2_set_lr, + .sync_lr_elrsr = vgic_v2_sync_lr_elrsr, + .get_elrsr = vgic_v2_get_elrsr, + .get_eisr = vgic_v2_get_eisr, + .get_interrupt_status = vgic_v2_get_interrupt_status, + .enable_underflow = vgic_v2_enable_underflow, + .disable_underflow = vgic_v2_disable_underflow, + .get_vmcr = vgic_v2_get_vmcr, + .set_vmcr = vgic_v2_set_vmcr, + .enable = vgic_v2_enable, +}; + +static struct vgic_params vgic_v2_params; + +/** + * vgic_v2_probe - probe for a GICv2 compatible interrupt controller in DT + * @node: pointer to the DT node + * @ops: address of a pointer to the GICv2 operations + * @params: address of a pointer to HW-specific parameters + * + * Returns 0 if a GICv2 has been found, with the low level operations + * in *ops and the HW parameters in *params. Returns an error code + * otherwise. + */ +int vgic_v2_probe(struct device_node *vgic_node, + const struct vgic_ops **ops, + const struct vgic_params **params) +{ + int ret; + struct resource vctrl_res; + struct resource vcpu_res; + struct vgic_params *vgic = &vgic_v2_params; + + vgic->maint_irq = irq_of_parse_and_map(vgic_node, 0); + if (!vgic->maint_irq) { + kvm_err("error getting vgic maintenance irq from DT\n"); + ret = -ENXIO; + goto out; + } + + ret = of_address_to_resource(vgic_node, 2, &vctrl_res); + if (ret) { + kvm_err("Cannot obtain GICH resource\n"); + goto out; + } + + vgic->vctrl_base = of_iomap(vgic_node, 2); + if (!vgic->vctrl_base) { + kvm_err("Cannot ioremap GICH\n"); + ret = -ENOMEM; + goto out; + } + + vgic->nr_lr = readl_relaxed(vgic->vctrl_base + GICH_VTR); + vgic->nr_lr = (vgic->nr_lr & 0x3f) + 1; + + ret = create_hyp_io_mappings(vgic->vctrl_base, + vgic->vctrl_base + resource_size(&vctrl_res), + vctrl_res.start); + if (ret) { + kvm_err("Cannot map VCTRL into hyp\n"); + goto out_unmap; + } + + if (of_address_to_resource(vgic_node, 3, &vcpu_res)) { + kvm_err("Cannot obtain GICV resource\n"); + ret = -ENXIO; + goto out_unmap; + } + vgic->vcpu_base = vcpu_res.start; + + kvm_info("%s@%llx IRQ%d\n", vgic_node->name, + vctrl_res.start, vgic->maint_irq); + + *ops = &vgic_v2_ops; + *params = vgic; + goto out; + +out_unmap: + iounmap(vgic->vctrl_base); +out: + of_node_put(vgic_node); + return ret; +} diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index 2a35cdbf343c..21d37f035a6f 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -95,7 +95,8 @@ static void vgic_set_lr(struct kvm_vcpu *vcpu, int lr, struct vgic_lr lr_desc); static void vgic_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr); static void vgic_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr); -static struct vgic_params vgic; +static const struct vgic_ops *vgic_ops; +static const struct vgic_params *vgic; static u32 *vgic_bitmap_get_reg(struct vgic_bitmap *x, int cpuid, u32 offset) @@ -971,205 +972,61 @@ static void vgic_update_state(struct kvm *kvm) } } -static struct vgic_lr vgic_v2_get_lr(const struct kvm_vcpu *vcpu, int lr) -{ - struct vgic_lr lr_desc; - u32 val = vcpu->arch.vgic_cpu.vgic_v2.vgic_lr[lr]; - - lr_desc.irq = val & GICH_LR_VIRTUALID; - if (lr_desc.irq <= 15) - lr_desc.source = (val >> GICH_LR_PHYSID_CPUID_SHIFT) & 0x7; - else - lr_desc.source = 0; - lr_desc.state = 0; - - if (val & GICH_LR_PENDING_BIT) - lr_desc.state |= LR_STATE_PENDING; - if (val & GICH_LR_ACTIVE_BIT) - lr_desc.state |= LR_STATE_ACTIVE; - if (val & GICH_LR_EOI) - lr_desc.state |= LR_EOI_INT; - - return lr_desc; -} - -static void vgic_v2_set_lr(struct kvm_vcpu *vcpu, int lr, - struct vgic_lr lr_desc) -{ - u32 lr_val = (lr_desc.source << GICH_LR_PHYSID_CPUID_SHIFT) | lr_desc.irq; - - if (lr_desc.state & LR_STATE_PENDING) - lr_val |= GICH_LR_PENDING_BIT; - if (lr_desc.state & LR_STATE_ACTIVE) - lr_val |= GICH_LR_ACTIVE_BIT; - if (lr_desc.state & LR_EOI_INT) - lr_val |= GICH_LR_EOI; - - vcpu->arch.vgic_cpu.vgic_v2.vgic_lr[lr] = lr_val; -} - -static void vgic_v2_sync_lr_elrsr(struct kvm_vcpu *vcpu, int lr, - struct vgic_lr lr_desc) -{ - if (!(lr_desc.state & LR_STATE_MASK)) - set_bit(lr, (unsigned long *)vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr); -} - -static u64 vgic_v2_get_elrsr(const struct kvm_vcpu *vcpu) -{ - u64 val; - -#if BITS_PER_LONG == 64 - val = vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr[1]; - val <<= 32; - val |= vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr[0]; -#else - val = *(u64 *)vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr; -#endif - return val; -} - -static u64 vgic_v2_get_eisr(const struct kvm_vcpu *vcpu) -{ - u64 val; - -#if BITS_PER_LONG == 64 - val = vcpu->arch.vgic_cpu.vgic_v2.vgic_eisr[1]; - val <<= 32; - val |= vcpu->arch.vgic_cpu.vgic_v2.vgic_eisr[0]; -#else - val = *(u64 *)vcpu->arch.vgic_cpu.vgic_v2.vgic_eisr; -#endif - return val; -} - -static u32 vgic_v2_get_interrupt_status(const struct kvm_vcpu *vcpu) -{ - u32 misr = vcpu->arch.vgic_cpu.vgic_v2.vgic_misr; - u32 ret = 0; - - if (misr & GICH_MISR_EOI) - ret |= INT_STATUS_EOI; - if (misr & GICH_MISR_U) - ret |= INT_STATUS_UNDERFLOW; - - return ret; -} - -static void vgic_v2_enable_underflow(struct kvm_vcpu *vcpu) -{ - vcpu->arch.vgic_cpu.vgic_v2.vgic_hcr |= GICH_HCR_UIE; -} - -static void vgic_v2_disable_underflow(struct kvm_vcpu *vcpu) -{ - vcpu->arch.vgic_cpu.vgic_v2.vgic_hcr &= ~GICH_HCR_UIE; -} - -static void vgic_v2_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp) -{ - u32 vmcr = vcpu->arch.vgic_cpu.vgic_v2.vgic_vmcr; - - vmcrp->ctlr = (vmcr & GICH_VMCR_CTRL_MASK) >> GICH_VMCR_CTRL_SHIFT; - vmcrp->abpr = (vmcr & GICH_VMCR_ALIAS_BINPOINT_MASK) >> GICH_VMCR_ALIAS_BINPOINT_SHIFT; - vmcrp->bpr = (vmcr & GICH_VMCR_BINPOINT_MASK) >> GICH_VMCR_BINPOINT_SHIFT; - vmcrp->pmr = (vmcr & GICH_VMCR_PRIMASK_MASK) >> GICH_VMCR_PRIMASK_SHIFT; -} - -static void vgic_v2_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp) -{ - u32 vmcr; - - vmcr = (vmcrp->ctlr << GICH_VMCR_CTRL_SHIFT) & GICH_VMCR_CTRL_MASK; - vmcr |= (vmcrp->abpr << GICH_VMCR_ALIAS_BINPOINT_SHIFT) & GICH_VMCR_ALIAS_BINPOINT_MASK; - vmcr |= (vmcrp->bpr << GICH_VMCR_BINPOINT_SHIFT) & GICH_VMCR_BINPOINT_MASK; - vmcr |= (vmcrp->pmr << GICH_VMCR_PRIMASK_SHIFT) & GICH_VMCR_PRIMASK_MASK; - - vcpu->arch.vgic_cpu.vgic_v2.vgic_vmcr = vmcr; -} - -static void vgic_v2_enable(struct kvm_vcpu *vcpu) -{ - /* - * By forcing VMCR to zero, the GIC will restore the binary - * points to their reset values. Anything else resets to zero - * anyway. - */ - vcpu->arch.vgic_cpu.vgic_v2.vgic_vmcr = 0; - - /* Get the show on the road... */ - vcpu->arch.vgic_cpu.vgic_v2.vgic_hcr = GICH_HCR_EN; -} - -static const struct vgic_ops vgic_ops = { - .get_lr = vgic_v2_get_lr, - .set_lr = vgic_v2_set_lr, - .sync_lr_elrsr = vgic_v2_sync_lr_elrsr, - .get_elrsr = vgic_v2_get_elrsr, - .get_eisr = vgic_v2_get_eisr, - .get_interrupt_status = vgic_v2_get_interrupt_status, - .enable_underflow = vgic_v2_enable_underflow, - .disable_underflow = vgic_v2_disable_underflow, - .get_vmcr = vgic_v2_get_vmcr, - .set_vmcr = vgic_v2_set_vmcr, - .enable = vgic_v2_enable, -}; - static struct vgic_lr vgic_get_lr(const struct kvm_vcpu *vcpu, int lr) { - return vgic_ops.get_lr(vcpu, lr); + return vgic_ops->get_lr(vcpu, lr); } static void vgic_set_lr(struct kvm_vcpu *vcpu, int lr, struct vgic_lr vlr) { - vgic_ops.set_lr(vcpu, lr, vlr); + vgic_ops->set_lr(vcpu, lr, vlr); } static void vgic_sync_lr_elrsr(struct kvm_vcpu *vcpu, int lr, struct vgic_lr vlr) { - vgic_ops.sync_lr_elrsr(vcpu, lr, vlr); + vgic_ops->sync_lr_elrsr(vcpu, lr, vlr); } static inline u64 vgic_get_elrsr(struct kvm_vcpu *vcpu) { - return vgic_ops.get_elrsr(vcpu); + return vgic_ops->get_elrsr(vcpu); } static inline u64 vgic_get_eisr(struct kvm_vcpu *vcpu) { - return vgic_ops.get_eisr(vcpu); + return vgic_ops->get_eisr(vcpu); } static inline u32 vgic_get_interrupt_status(struct kvm_vcpu *vcpu) { - return vgic_ops.get_interrupt_status(vcpu); + return vgic_ops->get_interrupt_status(vcpu); } static inline void vgic_enable_underflow(struct kvm_vcpu *vcpu) { - vgic_ops.enable_underflow(vcpu); + vgic_ops->enable_underflow(vcpu); } static inline void vgic_disable_underflow(struct kvm_vcpu *vcpu) { - vgic_ops.disable_underflow(vcpu); + vgic_ops->disable_underflow(vcpu); } static inline void vgic_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr) { - vgic_ops.get_vmcr(vcpu, vmcr); + vgic_ops->get_vmcr(vcpu, vmcr); } static void vgic_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr) { - vgic_ops.set_vmcr(vcpu, vmcr); + vgic_ops->set_vmcr(vcpu, vmcr); } static inline void vgic_enable(struct kvm_vcpu *vcpu) { - vgic_ops.enable(vcpu); + vgic_ops->enable(vcpu); } static void vgic_retire_lr(int lr_nr, int irq, struct kvm_vcpu *vcpu) @@ -1197,7 +1054,7 @@ static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu) struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; int lr; - for_each_set_bit(lr, vgic_cpu->lr_used, vgic.nr_lr) { + for_each_set_bit(lr, vgic_cpu->lr_used, vgic->nr_lr) { struct vgic_lr vlr = vgic_get_lr(vcpu, lr); if (!vgic_irq_is_enabled(vcpu, vlr.irq)) { @@ -1241,8 +1098,8 @@ static bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 sgi_source_id, int irq) /* Try to use another LR for this interrupt */ lr = find_first_zero_bit((unsigned long *)vgic_cpu->lr_used, - vgic.nr_lr); - if (lr >= vgic.nr_lr) + vgic->nr_lr); + if (lr >= vgic->nr_lr) return false; kvm_debug("LR%d allocated for IRQ%d %x\n", lr, irq, sgi_source_id); @@ -1382,7 +1239,7 @@ static bool vgic_process_maintenance(struct kvm_vcpu *vcpu) unsigned long *eisr_ptr = (unsigned long *)&eisr; int lr; - for_each_set_bit(lr, eisr_ptr, vgic.nr_lr) { + for_each_set_bit(lr, eisr_ptr, vgic->nr_lr) { struct vgic_lr vlr = vgic_get_lr(vcpu, lr); vgic_irq_clear_active(vcpu, vlr.irq); @@ -1430,7 +1287,7 @@ static void __kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu) elrsr_ptr = (unsigned long *)&elrsr; /* Clear mappings for empty LRs */ - for_each_set_bit(lr, elrsr_ptr, vgic.nr_lr) { + for_each_set_bit(lr, elrsr_ptr, vgic->nr_lr) { struct vgic_lr vlr; if (!test_and_clear_bit(lr, vgic_cpu->lr_used)) @@ -1443,8 +1300,8 @@ static void __kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu) } /* Check if we still have something up our sleeve... */ - pending = find_first_zero_bit(elrsr_ptr, vgic.nr_lr); - if (level_pending || pending < vgic.nr_lr) + pending = find_first_zero_bit(elrsr_ptr, vgic->nr_lr); + if (level_pending || pending < vgic->nr_lr) set_bit(vcpu->vcpu_id, &dist->irq_pending_on_cpu); } @@ -1638,7 +1495,7 @@ int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu) * all the way to the distributor structure to find out. Only * assembly code should use this one. */ - vgic_cpu->nr_lr = vgic.nr_lr; + vgic_cpu->nr_lr = vgic->nr_lr; vgic_enable(vcpu); @@ -1647,7 +1504,7 @@ int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu) static void vgic_init_maintenance_interrupt(void *info) { - enable_percpu_irq(vgic.maint_irq, 0); + enable_percpu_irq(vgic->maint_irq, 0); } static int vgic_cpu_notify(struct notifier_block *self, @@ -1660,7 +1517,7 @@ static int vgic_cpu_notify(struct notifier_block *self, break; case CPU_DYING: case CPU_DYING_FROZEN: - disable_percpu_irq(vgic.maint_irq); + disable_percpu_irq(vgic->maint_irq); break; } @@ -1671,31 +1528,36 @@ static struct notifier_block vgic_cpu_nb = { .notifier_call = vgic_cpu_notify, }; +static const struct of_device_id vgic_ids[] = { + { .compatible = "arm,cortex-a15-gic", .data = vgic_v2_probe, }, + {}, +}; + int kvm_vgic_hyp_init(void) { - int ret; - struct resource vctrl_res; - struct resource vcpu_res; + const struct of_device_id *matched_id; + int (*vgic_probe)(struct device_node *,const struct vgic_ops **, + const struct vgic_params **); struct device_node *vgic_node; + int ret; - vgic_node = of_find_compatible_node(NULL, NULL, "arm,cortex-a15-gic"); + vgic_node = of_find_matching_node_and_match(NULL, + vgic_ids, &matched_id); if (!vgic_node) { - kvm_err("error: no compatible vgic node in DT\n"); + kvm_err("error: no compatible GIC node found\n"); return -ENODEV; } - vgic.maint_irq = irq_of_parse_and_map(vgic_node, 0); - if (!vgic.maint_irq) { - kvm_err("error getting vgic maintenance irq from DT\n"); - ret = -ENXIO; - goto out; - } + vgic_probe = matched_id->data; + ret = vgic_probe(vgic_node, &vgic_ops, &vgic); + if (ret) + return ret; - ret = request_percpu_irq(vgic.maint_irq, vgic_maintenance_handler, + ret = request_percpu_irq(vgic->maint_irq, vgic_maintenance_handler, "vgic", kvm_get_running_vcpus()); if (ret) { - kvm_err("Cannot register interrupt %d\n", vgic.maint_irq); - goto out; + kvm_err("Cannot register interrupt %d\n", vgic->maint_irq); + return ret; } ret = register_cpu_notifier(&vgic_cpu_nb); @@ -1704,49 +1566,12 @@ int kvm_vgic_hyp_init(void) goto out_free_irq; } - ret = of_address_to_resource(vgic_node, 2, &vctrl_res); - if (ret) { - kvm_err("Cannot obtain VCTRL resource\n"); - goto out_free_irq; - } - - vgic.vctrl_base = of_iomap(vgic_node, 2); - if (!vgic.vctrl_base) { - kvm_err("Cannot ioremap VCTRL\n"); - ret = -ENOMEM; - goto out_free_irq; - } - - vgic.nr_lr = readl_relaxed(vgic.vctrl_base + GICH_VTR); - vgic.nr_lr = (vgic.nr_lr & 0x3f) + 1; - - ret = create_hyp_io_mappings(vgic.vctrl_base, - vgic.vctrl_base + resource_size(&vctrl_res), - vctrl_res.start); - if (ret) { - kvm_err("Cannot map VCTRL into hyp\n"); - goto out_unmap; - } - - kvm_info("%s@%llx IRQ%d\n", vgic_node->name, - vctrl_res.start, vgic.maint_irq); on_each_cpu(vgic_init_maintenance_interrupt, NULL, 1); - if (of_address_to_resource(vgic_node, 3, &vcpu_res)) { - kvm_err("Cannot obtain VCPU resource\n"); - ret = -ENXIO; - goto out_unmap; - } - vgic.vcpu_base = vcpu_res.start; - - goto out; + return 0; -out_unmap: - iounmap(vgic.vctrl_base); out_free_irq: - free_percpu_irq(vgic.maint_irq, kvm_get_running_vcpus()); -out: - of_node_put(vgic_node); + free_percpu_irq(vgic->maint_irq, kvm_get_running_vcpus()); return ret; } @@ -1779,7 +1604,7 @@ int kvm_vgic_init(struct kvm *kvm) } ret = kvm_phys_addr_ioremap(kvm, kvm->arch.vgic.vgic_cpu_base, - vgic.vcpu_base, KVM_VGIC_V2_CPU_SIZE); + vgic->vcpu_base, KVM_VGIC_V2_CPU_SIZE); if (ret) { kvm_err("Unable to remap VGIC CPU to VCPU\n"); goto out; @@ -1825,7 +1650,7 @@ int kvm_vgic_create(struct kvm *kvm) } spin_lock_init(&kvm->arch.vgic.lock); - kvm->arch.vgic.vctrl_base = vgic.vctrl_base; + kvm->arch.vgic.vctrl_base = vgic->vctrl_base; kvm->arch.vgic.vgic_dist_base = VGIC_ADDR_UNDEF; kvm->arch.vgic.vgic_cpu_base = VGIC_ADDR_UNDEF; -- cgit v1.2.3 From 49af11bddd902745cba6ffcd4d2606c1bbf88630 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 15 May 2014 10:03:25 +0100 Subject: KVM: ARM: vgic: revisit implementation of irqchip_in_kernel So far, irqchip_in_kernel() was implemented by testing the value of vctrl_base, which worked fine with GICv2. With GICv3, this field is useless, as we're using system registers instead of a emmory mapped interface. To solve this, add a boolean flag indicating if the we're using a vgic or not. Reviewed-by: Christoffer Dall Signed-off-by: Marc Zyngier (cherry picked from commit f982cf4e9c37b19478c7bc6e0484a43a7e78cf57) Signed-off-by: Christoffer Dall --- virt/kvm/arm/vgic.c | 1 + 1 file changed, 1 insertion(+) (limited to 'virt') diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index 21d37f035a6f..001597edde20 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -1650,6 +1650,7 @@ int kvm_vgic_create(struct kvm *kvm) } spin_lock_init(&kvm->arch.vgic.lock); + kvm->arch.vgic.in_kernel = true; kvm->arch.vgic.vctrl_base = vgic->vctrl_base; kvm->arch.vgic.vgic_dist_base = VGIC_ADDR_UNDEF; kvm->arch.vgic.vgic_cpu_base = VGIC_ADDR_UNDEF; -- cgit v1.2.3 From 3cbb795f697d19552a23e04abc7c4be62c8b564f Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 21 Jun 2013 11:57:56 +0100 Subject: arm64: KVM: split GICv2 world switch from hyp code Move the GICv2 world switch code into its own file, and add the necessary indirection to the arm64 switch code. Also introduce a new type field to the vgic_params structure. Acked-by: Catalin Marinas Reviewed-by: Christoffer Dall Signed-off-by: Marc Zyngier (cherry picked from commit 1a9b13056dde7e3092304d6041ccc60a913042ea) Signed-off-by: Christoffer Dall --- virt/kvm/arm/vgic-v2.c | 1 + virt/kvm/arm/vgic.c | 3 +++ 2 files changed, 4 insertions(+) (limited to 'virt') diff --git a/virt/kvm/arm/vgic-v2.c b/virt/kvm/arm/vgic-v2.c index 940418ebd0d0..d6c9c142f813 100644 --- a/virt/kvm/arm/vgic-v2.c +++ b/virt/kvm/arm/vgic-v2.c @@ -236,6 +236,7 @@ int vgic_v2_probe(struct device_node *vgic_node, kvm_info("%s@%llx IRQ%d\n", vgic_node->name, vctrl_res.start, vgic->maint_irq); + vgic->type = VGIC_V2; *ops = &vgic_v2_ops; *params = vgic; goto out; diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index 001597edde20..2c34804d0d88 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -1568,6 +1568,9 @@ int kvm_vgic_hyp_init(void) on_each_cpu(vgic_init_maintenance_interrupt, NULL, 1); + /* Callback into for arch code for setup */ + vgic_arch_setup(vgic); + return 0; out_free_irq: -- cgit v1.2.3 From 611c5bb5dd4db9b002a864d380d149c2222d9616 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 12 Jul 2013 15:15:23 +0100 Subject: KVM: ARM: vgic: add the GICv3 backend Introduce the support code for emulating a GICv2 on top of GICv3 hardware. Acked-by: Catalin Marinas Signed-off-by: Marc Zyngier (cherry picked from commit b2fb1c0d378399e1427a91bb991c094f2ca09a2f) Signed-off-by: Christoffer Dall --- virt/kvm/arm/vgic-v3.c | 231 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 231 insertions(+) create mode 100644 virt/kvm/arm/vgic-v3.c (limited to 'virt') diff --git a/virt/kvm/arm/vgic-v3.c b/virt/kvm/arm/vgic-v3.c new file mode 100644 index 000000000000..f01d44685720 --- /dev/null +++ b/virt/kvm/arm/vgic-v3.c @@ -0,0 +1,231 @@ +/* + * Copyright (C) 2013 ARM Limited, All Rights Reserved. + * Author: Marc Zyngier + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include +#include + +/* These are for GICv2 emulation only */ +#define GICH_LR_VIRTUALID (0x3ffUL << 0) +#define GICH_LR_PHYSID_CPUID_SHIFT (10) +#define GICH_LR_PHYSID_CPUID (7UL << GICH_LR_PHYSID_CPUID_SHIFT) + +/* + * LRs are stored in reverse order in memory. make sure we index them + * correctly. + */ +#define LR_INDEX(lr) (VGIC_V3_MAX_LRS - 1 - lr) + +static u32 ich_vtr_el2; + +static struct vgic_lr vgic_v3_get_lr(const struct kvm_vcpu *vcpu, int lr) +{ + struct vgic_lr lr_desc; + u64 val = vcpu->arch.vgic_cpu.vgic_v3.vgic_lr[LR_INDEX(lr)]; + + lr_desc.irq = val & GICH_LR_VIRTUALID; + if (lr_desc.irq <= 15) + lr_desc.source = (val >> GICH_LR_PHYSID_CPUID_SHIFT) & 0x7; + else + lr_desc.source = 0; + lr_desc.state = 0; + + if (val & ICH_LR_PENDING_BIT) + lr_desc.state |= LR_STATE_PENDING; + if (val & ICH_LR_ACTIVE_BIT) + lr_desc.state |= LR_STATE_ACTIVE; + if (val & ICH_LR_EOI) + lr_desc.state |= LR_EOI_INT; + + return lr_desc; +} + +static void vgic_v3_set_lr(struct kvm_vcpu *vcpu, int lr, + struct vgic_lr lr_desc) +{ + u64 lr_val = (((u32)lr_desc.source << GICH_LR_PHYSID_CPUID_SHIFT) | + lr_desc.irq); + + if (lr_desc.state & LR_STATE_PENDING) + lr_val |= ICH_LR_PENDING_BIT; + if (lr_desc.state & LR_STATE_ACTIVE) + lr_val |= ICH_LR_ACTIVE_BIT; + if (lr_desc.state & LR_EOI_INT) + lr_val |= ICH_LR_EOI; + + vcpu->arch.vgic_cpu.vgic_v3.vgic_lr[LR_INDEX(lr)] = lr_val; +} + +static void vgic_v3_sync_lr_elrsr(struct kvm_vcpu *vcpu, int lr, + struct vgic_lr lr_desc) +{ + if (!(lr_desc.state & LR_STATE_MASK)) + vcpu->arch.vgic_cpu.vgic_v3.vgic_elrsr |= (1U << lr); +} + +static u64 vgic_v3_get_elrsr(const struct kvm_vcpu *vcpu) +{ + return vcpu->arch.vgic_cpu.vgic_v3.vgic_elrsr; +} + +static u64 vgic_v3_get_eisr(const struct kvm_vcpu *vcpu) +{ + return vcpu->arch.vgic_cpu.vgic_v3.vgic_eisr; +} + +static u32 vgic_v3_get_interrupt_status(const struct kvm_vcpu *vcpu) +{ + u32 misr = vcpu->arch.vgic_cpu.vgic_v3.vgic_misr; + u32 ret = 0; + + if (misr & ICH_MISR_EOI) + ret |= INT_STATUS_EOI; + if (misr & ICH_MISR_U) + ret |= INT_STATUS_UNDERFLOW; + + return ret; +} + +static void vgic_v3_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp) +{ + u32 vmcr = vcpu->arch.vgic_cpu.vgic_v3.vgic_vmcr; + + vmcrp->ctlr = (vmcr & ICH_VMCR_CTLR_MASK) >> ICH_VMCR_CTLR_SHIFT; + vmcrp->abpr = (vmcr & ICH_VMCR_BPR1_MASK) >> ICH_VMCR_BPR1_SHIFT; + vmcrp->bpr = (vmcr & ICH_VMCR_BPR0_MASK) >> ICH_VMCR_BPR0_SHIFT; + vmcrp->pmr = (vmcr & ICH_VMCR_PMR_MASK) >> ICH_VMCR_PMR_SHIFT; +} + +static void vgic_v3_enable_underflow(struct kvm_vcpu *vcpu) +{ + vcpu->arch.vgic_cpu.vgic_v3.vgic_hcr |= ICH_HCR_UIE; +} + +static void vgic_v3_disable_underflow(struct kvm_vcpu *vcpu) +{ + vcpu->arch.vgic_cpu.vgic_v3.vgic_hcr &= ~ICH_HCR_UIE; +} + +static void vgic_v3_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp) +{ + u32 vmcr; + + vmcr = (vmcrp->ctlr << ICH_VMCR_CTLR_SHIFT) & ICH_VMCR_CTLR_MASK; + vmcr |= (vmcrp->abpr << ICH_VMCR_BPR1_SHIFT) & ICH_VMCR_BPR1_MASK; + vmcr |= (vmcrp->bpr << ICH_VMCR_BPR0_SHIFT) & ICH_VMCR_BPR0_MASK; + vmcr |= (vmcrp->pmr << ICH_VMCR_PMR_SHIFT) & ICH_VMCR_PMR_MASK; + + vcpu->arch.vgic_cpu.vgic_v3.vgic_vmcr = vmcr; +} + +static void vgic_v3_enable(struct kvm_vcpu *vcpu) +{ + /* + * By forcing VMCR to zero, the GIC will restore the binary + * points to their reset values. Anything else resets to zero + * anyway. + */ + vcpu->arch.vgic_cpu.vgic_v3.vgic_vmcr = 0; + + /* Get the show on the road... */ + vcpu->arch.vgic_cpu.vgic_v3.vgic_hcr = ICH_HCR_EN; +} + +static const struct vgic_ops vgic_v3_ops = { + .get_lr = vgic_v3_get_lr, + .set_lr = vgic_v3_set_lr, + .sync_lr_elrsr = vgic_v3_sync_lr_elrsr, + .get_elrsr = vgic_v3_get_elrsr, + .get_eisr = vgic_v3_get_eisr, + .get_interrupt_status = vgic_v3_get_interrupt_status, + .enable_underflow = vgic_v3_enable_underflow, + .disable_underflow = vgic_v3_disable_underflow, + .get_vmcr = vgic_v3_get_vmcr, + .set_vmcr = vgic_v3_set_vmcr, + .enable = vgic_v3_enable, +}; + +static struct vgic_params vgic_v3_params; + +/** + * vgic_v3_probe - probe for a GICv3 compatible interrupt controller in DT + * @node: pointer to the DT node + * @ops: address of a pointer to the GICv3 operations + * @params: address of a pointer to HW-specific parameters + * + * Returns 0 if a GICv3 has been found, with the low level operations + * in *ops and the HW parameters in *params. Returns an error code + * otherwise. + */ +int vgic_v3_probe(struct device_node *vgic_node, + const struct vgic_ops **ops, + const struct vgic_params **params) +{ + int ret = 0; + u32 gicv_idx; + struct resource vcpu_res; + struct vgic_params *vgic = &vgic_v3_params; + + vgic->maint_irq = irq_of_parse_and_map(vgic_node, 0); + if (!vgic->maint_irq) { + kvm_err("error getting vgic maintenance irq from DT\n"); + ret = -ENXIO; + goto out; + } + + ich_vtr_el2 = kvm_call_hyp(__vgic_v3_get_ich_vtr_el2); + + /* + * The ListRegs field is 5 bits, but there is a architectural + * maximum of 16 list registers. Just ignore bit 4... + */ + vgic->nr_lr = (ich_vtr_el2 & 0xf) + 1; + + if (of_property_read_u32(vgic_node, "#redistributor-regions", &gicv_idx)) + gicv_idx = 1; + + gicv_idx += 3; /* Also skip GICD, GICC, GICH */ + if (of_address_to_resource(vgic_node, gicv_idx, &vcpu_res)) { + kvm_err("Cannot obtain GICV region\n"); + ret = -ENXIO; + goto out; + } + vgic->vcpu_base = vcpu_res.start; + vgic->vctrl_base = NULL; + vgic->type = VGIC_V3; + + kvm_info("%s@%llx IRQ%d\n", vgic_node->name, + vcpu_res.start, vgic->maint_irq); + + *ops = &vgic_v3_ops; + *params = vgic; + +out: + of_node_put(vgic_node); + return ret; +} -- cgit v1.2.3 From 62c72507f3203889f14ab213260b8b1a693f34ef Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 9 Jul 2013 10:45:49 +0100 Subject: arm64: KVM: vgic: enable GICv2 emulation on top on GICv3 hardware Add the last missing bits that enable GICv2 emulation on top of GICv3 hardware. Signed-off-by: Marc Zyngier (cherry picked from commit 67b2abfedb7b861bead93400fa315c5c30879d51) Signed-off-by: Christoffer Dall --- virt/kvm/arm/vgic.c | 1 + 1 file changed, 1 insertion(+) (limited to 'virt') diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index 2c34804d0d88..c43716d81f47 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -1530,6 +1530,7 @@ static struct notifier_block vgic_cpu_nb = { static const struct of_device_id vgic_ids[] = { { .compatible = "arm,cortex-a15-gic", .data = vgic_v2_probe, }, + { .compatible = "arm,gic-v3", .data = vgic_v3_probe, }, {}, }; -- cgit v1.2.3 From ae1fc970eb80e9f8332948cf6b23f71f49cd5093 Mon Sep 17 00:00:00 2001 From: Victor Kamensky Date: Thu, 12 Jun 2014 09:30:04 -0700 Subject: ARM: KVM: vgic mmio should hold data as LE bytes array in BE case According to recent clarifications of mmio.data array meaning - the mmio.data array should hold bytes as they would appear in memory. Vgic is little endian device. And in case of BE image kernel side that emulates vgic, holds data in BE form. So we need to byteswap cpu<->le32 vgic registers when we read/write them from mmio.data[]. Change has no effect in LE case because cpu already runs in le32. Signed-off-by: Victor Kamensky Reviewed-by: Christoffer Dall Acked-by: Marc Zyngier Signed-off-by: Marc Zyngier (cherry picked from commit 1c9f04717ca8326e8df759d5dda9cd1b3d968b5b) Signed-off-by: Christoffer Dall --- virt/kvm/arm/vgic.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'virt') diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index c43716d81f47..c7abb26cd517 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -238,12 +238,12 @@ static void vgic_cpu_irq_clear(struct kvm_vcpu *vcpu, int irq) static u32 mmio_data_read(struct kvm_exit_mmio *mmio, u32 mask) { - return *((u32 *)mmio->data) & mask; + return le32_to_cpu(*((u32 *)mmio->data)) & mask; } static void mmio_data_write(struct kvm_exit_mmio *mmio, u32 mask, u32 value) { - *((u32 *)mmio->data) = value & mask; + *((u32 *)mmio->data) = cpu_to_le32(value) & mask; } /** -- cgit v1.2.3 From fad1f0113487190441d89ee8483a6b25d35eb153 Mon Sep 17 00:00:00 2001 From: Victor Kamensky Date: Thu, 12 Jun 2014 09:30:10 -0700 Subject: ARM64: KVM: fix vgic_bitmap_get_reg function for BE 64bit case Fix vgic_bitmap_get_reg function to return 'right' word address of 'unsigned long' bitmap value in case of BE 64bit image. Signed-off-by: Victor Kamensky Reviewed-by: Christoffer Dall Signed-off-by: Marc Zyngier (cherry picked from commit 9662fb4854e1319b4affda47f279c3f210316def) Signed-off-by: Christoffer Dall --- virt/kvm/arm/vgic.c | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) (limited to 'virt') diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index c7abb26cd517..9bb20ef248d3 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -98,14 +98,34 @@ static void vgic_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr); static const struct vgic_ops *vgic_ops; static const struct vgic_params *vgic; +/* + * struct vgic_bitmap contains unions that provide two views of + * the same data. In one case it is an array of registers of + * u32's, and in the other case it is a bitmap of unsigned + * longs. + * + * This does not work on 64-bit BE systems, because the bitmap access + * will store two consecutive 32-bit words with the higher-addressed + * register's bits at the lower index and the lower-addressed register's + * bits at the higher index. + * + * Therefore, swizzle the register index when accessing the 32-bit word + * registers to access the right register's value. + */ +#if defined(CONFIG_CPU_BIG_ENDIAN) && BITS_PER_LONG == 64 +#define REG_OFFSET_SWIZZLE 1 +#else +#define REG_OFFSET_SWIZZLE 0 +#endif + static u32 *vgic_bitmap_get_reg(struct vgic_bitmap *x, int cpuid, u32 offset) { offset >>= 2; if (!offset) - return x->percpu[cpuid].reg; + return x->percpu[cpuid].reg + (offset ^ REG_OFFSET_SWIZZLE); else - return x->shared.reg + offset - 1; + return x->shared.reg + ((offset - 1) ^ REG_OFFSET_SWIZZLE); } static int vgic_bitmap_get_irq_val(struct vgic_bitmap *x, -- cgit v1.2.3 From 6bb7724ee43511575aba8505caba2b22ccbd0771 Mon Sep 17 00:00:00 2001 From: Mark Rustad Date: Fri, 25 Jul 2014 06:27:03 -0700 Subject: kvm: Resolve missing-field-initializers warnings Resolve missing-field-initializers warnings seen in W=2 kernel builds by having macros generate more elaborated initializers. That is enough to silence the warnings. Signed-off-by: Mark Rustad Signed-off-by: Jeff Kirsher Signed-off-by: Paolo Bonzini (cherry picked from commit 25f97ff451a4aab534afc1290af97d23ea0b4fb3) Signed-off-by: Christoffer Dall --- virt/kvm/irq_comm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'virt') diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c index ced4a542a031..a228ee82bad2 100644 --- a/virt/kvm/irq_comm.c +++ b/virt/kvm/irq_comm.c @@ -323,13 +323,13 @@ out: #define IOAPIC_ROUTING_ENTRY(irq) \ { .gsi = irq, .type = KVM_IRQ_ROUTING_IRQCHIP, \ - .u.irqchip.irqchip = KVM_IRQCHIP_IOAPIC, .u.irqchip.pin = (irq) } + .u.irqchip = { .irqchip = KVM_IRQCHIP_IOAPIC, .pin = (irq) } } #define ROUTING_ENTRY1(irq) IOAPIC_ROUTING_ENTRY(irq) #ifdef CONFIG_X86 # define PIC_ROUTING_ENTRY(irq) \ { .gsi = irq, .type = KVM_IRQ_ROUTING_IRQCHIP, \ - .u.irqchip.irqchip = SELECT_PIC(irq), .u.irqchip.pin = (irq) % 8 } + .u.irqchip = { .irqchip = SELECT_PIC(irq), .pin = (irq) % 8 } } # define ROUTING_ENTRY2(irq) \ IOAPIC_ROUTING_ENTRY(irq), PIC_ROUTING_ENTRY(irq) #else -- cgit v1.2.3 From 2f29ef83d9215f7373d5491fe33d6fadc2183d4a Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Thu, 2 Oct 2014 22:17:34 +0200 Subject: KVM: Rename and add argument to check_extension In preparation to make the check_extension function available to VM scope we add a struct kvm * argument to the function header and rename the function accordingly. It will still be called from the /dev/kvm fd, but with a NULL argument for struct kvm *. Signed-off-by: Alexander Graf Acked-by: Paolo Bonzini (cherry picked from commit 784aa3d7fb6f729c06d5836c9d9569f58e4d05ae) Signed-off-by: Christoffer Dall --- virt/kvm/kvm_main.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'virt') diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index bf6cc062740d..6075902a0c73 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2568,7 +2568,7 @@ static int kvm_dev_ioctl_create_vm(unsigned long type) return r; } -static long kvm_dev_ioctl_check_extension_generic(long arg) +static long kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg) { switch (arg) { case KVM_CAP_USER_MEMORY: @@ -2592,7 +2592,7 @@ static long kvm_dev_ioctl_check_extension_generic(long arg) default: break; } - return kvm_dev_ioctl_check_extension(arg); + return kvm_vm_ioctl_check_extension(kvm, arg); } static long kvm_dev_ioctl(struct file *filp, @@ -2611,7 +2611,7 @@ static long kvm_dev_ioctl(struct file *filp, r = kvm_dev_ioctl_create_vm(arg); break; case KVM_CHECK_EXTENSION: - r = kvm_dev_ioctl_check_extension_generic(arg); + r = kvm_vm_ioctl_check_extension_generic(NULL, arg); break; case KVM_GET_VCPU_MMAP_SIZE: r = -EINVAL; -- cgit v1.2.3 From 0f1f16ef3bcd0e699c0763be638d1cb57797e5d5 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Thu, 2 Oct 2014 22:18:06 +0200 Subject: KVM: Allow KVM_CHECK_EXTENSION on the vm fd The KVM_CHECK_EXTENSION is only available on the kvm fd today. Unfortunately on PPC some of the capabilities change depending on the way a VM was created. So instead we need a way to expose capabilities as VM ioctl, so that we can see which VM type we're using (HV or PR). To enable this, add the KVM_CHECK_EXTENSION ioctl to our vm ioctl portfolio. Signed-off-by: Alexander Graf Acked-by: Paolo Bonzini (cherry picked from commit 92b591a4c46b103ebd3fc0d03a084e1efd331253) Signed-off-by: Christoffer Dall --- virt/kvm/kvm_main.c | 58 ++++++++++++++++++++++++++++------------------------- 1 file changed, 31 insertions(+), 27 deletions(-) (limited to 'virt') diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 6075902a0c73..9e99bfb10ba4 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2321,6 +2321,34 @@ static int kvm_ioctl_create_device(struct kvm *kvm, return 0; } +static long kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg) +{ + switch (arg) { + case KVM_CAP_USER_MEMORY: + case KVM_CAP_DESTROY_MEMORY_REGION_WORKS: + case KVM_CAP_JOIN_MEMORY_REGIONS_WORKS: +#ifdef CONFIG_KVM_APIC_ARCHITECTURE + case KVM_CAP_SET_BOOT_CPU_ID: +#endif + case KVM_CAP_INTERNAL_ERROR_DATA: +#ifdef CONFIG_HAVE_KVM_MSI + case KVM_CAP_SIGNAL_MSI: +#endif +#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING + case KVM_CAP_IRQFD_RESAMPLE: +#endif + case KVM_CAP_CHECK_EXTENSION_VM: + return 1; +#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING + case KVM_CAP_IRQ_ROUTING: + return KVM_MAX_IRQ_ROUTES; +#endif + default: + break; + } + return kvm_vm_ioctl_check_extension(kvm, arg); +} + static long kvm_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) { @@ -2484,6 +2512,9 @@ static long kvm_vm_ioctl(struct file *filp, r = 0; break; } + case KVM_CHECK_EXTENSION: + r = kvm_vm_ioctl_check_extension_generic(kvm, arg); + break; default: r = kvm_arch_vm_ioctl(filp, ioctl, arg); if (r == -ENOTTY) @@ -2568,33 +2599,6 @@ static int kvm_dev_ioctl_create_vm(unsigned long type) return r; } -static long kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg) -{ - switch (arg) { - case KVM_CAP_USER_MEMORY: - case KVM_CAP_DESTROY_MEMORY_REGION_WORKS: - case KVM_CAP_JOIN_MEMORY_REGIONS_WORKS: -#ifdef CONFIG_KVM_APIC_ARCHITECTURE - case KVM_CAP_SET_BOOT_CPU_ID: -#endif - case KVM_CAP_INTERNAL_ERROR_DATA: -#ifdef CONFIG_HAVE_KVM_MSI - case KVM_CAP_SIGNAL_MSI: -#endif -#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING - case KVM_CAP_IRQFD_RESAMPLE: -#endif - return 1; -#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING - case KVM_CAP_IRQ_ROUTING: - return KVM_MAX_IRQ_ROUTES; -#endif - default: - break; - } - return kvm_vm_ioctl_check_extension(kvm, arg); -} - static long kvm_dev_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) { -- cgit v1.2.3 From 624c250ccbd84584fd7eac5233f5c45cce4c40b9 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Thu, 2 Oct 2014 22:21:37 +0200 Subject: kvm: arm64: vgic: fix hyp panic with 64k pages on juno platform If the physical address of GICV isn't page-aligned, then we end up creating a stage-2 mapping of the page containing it, which causes us to map neighbouring memory locations directly into the guest. As an example, consider a platform with GICV at physical 0x2c02f000 running a 64k-page host kernel. If qemu maps this into the guest at 0x80010000, then guest physical addresses 0x80010000 - 0x8001efff will map host physical region 0x2c020000 - 0x2c02efff. Accesses to these physical regions may cause UNPREDICTABLE behaviour, for example, on the Juno platform this will cause an SError exception to EL3, which brings down the entire physical CPU resulting in RCU stalls / HYP panics / host crashing / wasted weeks of debugging. SBSA recommends that systems alias the 4k GICV across the bounding 64k region, in which case GICV physical could be described as 0x2c020000 in the above scenario. This patch fixes the problem by failing the vgic probe if the physical base address or the size of GICV aren't page-aligned. Note that this generated a warning in dmesg about freeing enabled IRQs, so I had to move the IRQ enabling later in the probe. Cc: Christoffer Dall Cc: Marc Zyngier Cc: Gleb Natapov Cc: Paolo Bonzini Cc: Joel Schopp Cc: Don Dutile Acked-by: Peter Maydell Acked-by: Joel Schopp Acked-by: Marc Zyngier Signed-off-by: Will Deacon Signed-off-by: Christoffer Dall (cherry picked from commit 63afbe7a0ac184ef8485dac4914e87b211b5bfaa) Signed-off-by: Christoffer Dall --- virt/kvm/arm/vgic-v2.c | 16 ++++++++++++++++ virt/kvm/arm/vgic.c | 4 ++-- 2 files changed, 18 insertions(+), 2 deletions(-) (limited to 'virt') diff --git a/virt/kvm/arm/vgic-v2.c b/virt/kvm/arm/vgic-v2.c index d6c9c142f813..01124ef3690a 100644 --- a/virt/kvm/arm/vgic-v2.c +++ b/virt/kvm/arm/vgic-v2.c @@ -231,6 +231,22 @@ int vgic_v2_probe(struct device_node *vgic_node, ret = -ENXIO; goto out_unmap; } + + if (!PAGE_ALIGNED(vcpu_res.start)) { + kvm_err("GICV physical address 0x%llx not page aligned\n", + (unsigned long long)vcpu_res.start); + ret = -ENXIO; + goto out_unmap; + } + + if (!PAGE_ALIGNED(resource_size(&vcpu_res))) { + kvm_err("GICV size 0x%llx not a multiple of page size 0x%lx\n", + (unsigned long long)resource_size(&vcpu_res), + PAGE_SIZE); + ret = -ENXIO; + goto out_unmap; + } + vgic->vcpu_base = vcpu_res.start; kvm_info("%s@%llx IRQ%d\n", vgic_node->name, diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index 9bb20ef248d3..8823cccfe884 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -1587,11 +1587,11 @@ int kvm_vgic_hyp_init(void) goto out_free_irq; } - on_each_cpu(vgic_init_maintenance_interrupt, NULL, 1); - /* Callback into for arch code for setup */ vgic_arch_setup(vgic); + on_each_cpu(vgic_init_maintenance_interrupt, NULL, 1); + return 0; out_free_irq: -- cgit v1.2.3 From 5ed30eec2507d903c66f51d519cc1bca043042a3 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 31 Jul 2014 11:42:18 +0100 Subject: KVM: arm64: GICv3: mandate page-aligned GICV region Just like GICv2 was fixed in 63afbe7a0ac1 (kvm: arm64: vgic: fix hyp panic with 64k pages on juno platform), mandate the GICV region to be both aligned on a page boundary and its size to be a multiple of page size. This prevents a guest from being able to poke at regions where we have no idea what is sitting there. Signed-off-by: Marc Zyngier Signed-off-by: Christoffer Dall (cherry picked from commit fb3ec67942e92e5713e05b7691b277d0a0c0575d) Signed-off-by: Christoffer Dall --- virt/kvm/arm/vgic-v3.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'virt') diff --git a/virt/kvm/arm/vgic-v3.c b/virt/kvm/arm/vgic-v3.c index f01d44685720..1c2c8eef0599 100644 --- a/virt/kvm/arm/vgic-v3.c +++ b/virt/kvm/arm/vgic-v3.c @@ -215,6 +215,22 @@ int vgic_v3_probe(struct device_node *vgic_node, ret = -ENXIO; goto out; } + + if (!PAGE_ALIGNED(vcpu_res.start)) { + kvm_err("GICV physical address 0x%llx not page aligned\n", + (unsigned long long)vcpu_res.start); + ret = -ENXIO; + goto out; + } + + if (!PAGE_ALIGNED(resource_size(&vcpu_res))) { + kvm_err("GICV size 0x%llx not a multiple of page size 0x%lx\n", + (unsigned long long)resource_size(&vcpu_res), + PAGE_SIZE); + ret = -ENXIO; + goto out; + } + vgic->vcpu_base = vcpu_res.start; vgic->vctrl_base = NULL; vgic->type = VGIC_V3; -- cgit v1.2.3 From 84064185d8183dafa6f6c174fad4b22f9cb6e9bd Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Mon, 30 Jun 2014 20:51:09 +1000 Subject: KVM: Don't keep reference to irq routing table in irqfd struct This makes the irqfd code keep a copy of the irq routing table entry for each irqfd, rather than a reference to the copy in the actual irq routing table maintained in kvm/virt/irqchip.c. This will enable us to change the routing table structure in future, or even not have a routing table at all on some platforms. The synchronization that was previously achieved using srcu_dereference on the read side is now achieved using a seqcount_t structure. That ensures that we don't get a halfway-updated copy of the structure if we read it while another thread is updating it. We still use srcu_read_lock/unlock around the read side so that when changing the routing table we can be sure that after calling synchronize_srcu, nothing will be using the old routing. Signed-off-by: Paul Mackerras Tested-by: Eric Auger Tested-by: Cornelia Huck Signed-off-by: Paolo Bonzini (cherry picked from commit 56f89f3629ffd1a21d38c3d0bea23deac0e284ce) Signed-off-by: Christoffer Dall --- virt/kvm/eventfd.c | 41 +++++++++++++++++++++++++---------------- 1 file changed, 25 insertions(+), 16 deletions(-) (limited to 'virt') diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index 41d06db53c9c..656292cebccd 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c @@ -33,6 +33,7 @@ #include #include #include +#include #include "iodev.h" @@ -75,7 +76,8 @@ struct _irqfd { struct kvm *kvm; wait_queue_t wait; /* Update side is protected by irqfds.lock */ - struct kvm_kernel_irq_routing_entry __rcu *irq_entry; + struct kvm_kernel_irq_routing_entry irq_entry; + seqcount_t irq_entry_sc; /* Used for level IRQ fast-path */ int gsi; struct work_struct inject; @@ -223,16 +225,20 @@ irqfd_wakeup(wait_queue_t *wait, unsigned mode, int sync, void *key) { struct _irqfd *irqfd = container_of(wait, struct _irqfd, wait); unsigned long flags = (unsigned long)key; - struct kvm_kernel_irq_routing_entry *irq; + struct kvm_kernel_irq_routing_entry irq; struct kvm *kvm = irqfd->kvm; + unsigned seq; int idx; if (flags & POLLIN) { idx = srcu_read_lock(&kvm->irq_srcu); - irq = srcu_dereference(irqfd->irq_entry, &kvm->irq_srcu); + do { + seq = read_seqcount_begin(&irqfd->irq_entry_sc); + irq = irqfd->irq_entry; + } while (read_seqcount_retry(&irqfd->irq_entry_sc, seq)); /* An event has been signaled, inject an interrupt */ - if (irq) - kvm_set_msi(irq, kvm, KVM_USERSPACE_IRQ_SOURCE_ID, 1, + if (irq.type == KVM_IRQ_ROUTING_MSI) + kvm_set_msi(&irq, kvm, KVM_USERSPACE_IRQ_SOURCE_ID, 1, false); else schedule_work(&irqfd->inject); @@ -277,18 +283,20 @@ static void irqfd_update(struct kvm *kvm, struct _irqfd *irqfd, { struct kvm_kernel_irq_routing_entry *e; - if (irqfd->gsi >= irq_rt->nr_rt_entries) { - rcu_assign_pointer(irqfd->irq_entry, NULL); - return; - } + write_seqcount_begin(&irqfd->irq_entry_sc); + + irqfd->irq_entry.type = 0; + if (irqfd->gsi >= irq_rt->nr_rt_entries) + goto out; hlist_for_each_entry(e, &irq_rt->map[irqfd->gsi], link) { /* Only fast-path MSI. */ if (e->type == KVM_IRQ_ROUTING_MSI) - rcu_assign_pointer(irqfd->irq_entry, e); - else - rcu_assign_pointer(irqfd->irq_entry, NULL); + irqfd->irq_entry = *e; } + + out: + write_seqcount_end(&irqfd->irq_entry_sc); } static int @@ -310,6 +318,7 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args) INIT_LIST_HEAD(&irqfd->list); INIT_WORK(&irqfd->inject, irqfd_inject); INIT_WORK(&irqfd->shutdown, irqfd_shutdown); + seqcount_init(&irqfd->irq_entry_sc); f = fdget(args->fd); if (!f.file) { @@ -466,14 +475,14 @@ kvm_irqfd_deassign(struct kvm *kvm, struct kvm_irqfd *args) list_for_each_entry_safe(irqfd, tmp, &kvm->irqfds.items, list) { if (irqfd->eventfd == eventfd && irqfd->gsi == args->gsi) { /* - * This rcu_assign_pointer is needed for when + * This clearing of irq_entry.type is needed for when * another thread calls kvm_irq_routing_update before * we flush workqueue below (we synchronize with * kvm_irq_routing_update using irqfds.lock). - * It is paired with synchronize_srcu done by caller - * of that function. */ - rcu_assign_pointer(irqfd->irq_entry, NULL); + write_seqcount_begin(&irqfd->irq_entry_sc); + irqfd->irq_entry.type = 0; + write_seqcount_end(&irqfd->irq_entry_sc); irqfd_deactivate(irqfd); } } -- cgit v1.2.3 From 21945799f9ebce289a2600188f1baabc9d1435b5 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Mon, 30 Jun 2014 20:51:10 +1000 Subject: KVM: irqchip: Provide and use accessors for irq routing table This provides accessor functions for the KVM interrupt mappings, in order to reduce the amount of code that accesses the fields of the kvm_irq_routing_table struct, and restrict that code to one file, virt/kvm/irqchip.c. The new functions are kvm_irq_map_gsi(), which maps from a global interrupt number to a set of IRQ routing entries, and kvm_irq_map_chip_pin, which maps from IRQ chip and pin numbers to a global interrupt number. This also moves the update of kvm_irq_routing_table::chip[][] into irqchip.c, out of the various kvm_set_routing_entry implementations. That means that none of the kvm_set_routing_entry implementations need the kvm_irq_routing_table argument anymore, so this removes it. This does not change any locking or data lifetime rules. Signed-off-by: Paul Mackerras Tested-by: Eric Auger Tested-by: Cornelia Huck Signed-off-by: Paolo Bonzini (cherry picked from commit 8ba918d488caded2c4368b0b922eb905fe3bb101) Signed-off-by: Christoffer Dall --- virt/kvm/eventfd.c | 10 ++++++---- virt/kvm/irq_comm.c | 20 +++++++++----------- virt/kvm/irqchip.c | 42 ++++++++++++++++++++++++++++++++++-------- 3 files changed, 49 insertions(+), 23 deletions(-) (limited to 'virt') diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index 656292cebccd..01698e36e245 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c @@ -282,20 +282,22 @@ static void irqfd_update(struct kvm *kvm, struct _irqfd *irqfd, struct kvm_irq_routing_table *irq_rt) { struct kvm_kernel_irq_routing_entry *e; + struct kvm_kernel_irq_routing_entry entries[KVM_NR_IRQCHIPS]; + int i, n_entries; + + n_entries = kvm_irq_map_gsi(entries, irq_rt, irqfd->gsi); write_seqcount_begin(&irqfd->irq_entry_sc); irqfd->irq_entry.type = 0; - if (irqfd->gsi >= irq_rt->nr_rt_entries) - goto out; - hlist_for_each_entry(e, &irq_rt->map[irqfd->gsi], link) { + e = entries; + for (i = 0; i < n_entries; ++i, ++e) { /* Only fast-path MSI. */ if (e->type == KVM_IRQ_ROUTING_MSI) irqfd->irq_entry = *e; } - out: write_seqcount_end(&irqfd->irq_entry_sc); } diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c index a228ee82bad2..175844593243 100644 --- a/virt/kvm/irq_comm.c +++ b/virt/kvm/irq_comm.c @@ -160,6 +160,7 @@ static int kvm_set_msi_inatomic(struct kvm_kernel_irq_routing_entry *e, */ int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq, int level) { + struct kvm_kernel_irq_routing_entry entries[KVM_NR_IRQCHIPS]; struct kvm_kernel_irq_routing_entry *e; int ret = -EINVAL; struct kvm_irq_routing_table *irq_rt; @@ -177,14 +178,13 @@ int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq, int level) */ idx = srcu_read_lock(&kvm->irq_srcu); irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu); - if (irq < irq_rt->nr_rt_entries) - hlist_for_each_entry(e, &irq_rt->map[irq], link) { - if (likely(e->type == KVM_IRQ_ROUTING_MSI)) - ret = kvm_set_msi_inatomic(e, kvm); - else - ret = -EWOULDBLOCK; - break; - } + if (kvm_irq_map_gsi(entries, irq_rt, irq) > 0) { + e = &entries[0]; + if (likely(e->type == KVM_IRQ_ROUTING_MSI)) + ret = kvm_set_msi_inatomic(e, kvm); + else + ret = -EWOULDBLOCK; + } srcu_read_unlock(&kvm->irq_srcu, idx); return ret; } @@ -272,8 +272,7 @@ void kvm_fire_mask_notifiers(struct kvm *kvm, unsigned irqchip, unsigned pin, srcu_read_unlock(&kvm->irq_srcu, idx); } -int kvm_set_routing_entry(struct kvm_irq_routing_table *rt, - struct kvm_kernel_irq_routing_entry *e, +int kvm_set_routing_entry(struct kvm_kernel_irq_routing_entry *e, const struct kvm_irq_routing_entry *ue) { int r = -EINVAL; @@ -304,7 +303,6 @@ int kvm_set_routing_entry(struct kvm_irq_routing_table *rt, e->irqchip.pin = ue->u.irqchip.pin + delta; if (e->irqchip.pin >= max_pin) goto out; - rt->chip[ue->u.irqchip.irqchip][e->irqchip.pin] = ue->gsi; break; case KVM_IRQ_ROUTING_MSI: e->set = kvm_set_msi; diff --git a/virt/kvm/irqchip.c b/virt/kvm/irqchip.c index b43c275775cd..f4648dd94888 100644 --- a/virt/kvm/irqchip.c +++ b/virt/kvm/irqchip.c @@ -31,13 +31,37 @@ #include #include "irq.h" +int kvm_irq_map_gsi(struct kvm_kernel_irq_routing_entry *entries, + struct kvm_irq_routing_table *irq_rt, int gsi) +{ + struct kvm_kernel_irq_routing_entry *e; + int n = 0; + + if (gsi < irq_rt->nr_rt_entries) { + hlist_for_each_entry(e, &irq_rt->map[gsi], link) { + entries[n] = *e; + ++n; + } + } + + return n; +} + +int kvm_irq_map_chip_pin(struct kvm_irq_routing_table *irq_rt, + unsigned irqchip, unsigned pin) +{ + return irq_rt->chip[irqchip][pin]; +} + bool kvm_irq_has_notifier(struct kvm *kvm, unsigned irqchip, unsigned pin) { + struct kvm_irq_routing_table *irq_rt; struct kvm_irq_ack_notifier *kian; int gsi, idx; idx = srcu_read_lock(&kvm->irq_srcu); - gsi = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu)->chip[irqchip][pin]; + irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu); + gsi = kvm_irq_map_chip_pin(irq_rt, irqchip, pin); if (gsi != -1) hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list, link) @@ -54,13 +78,15 @@ EXPORT_SYMBOL_GPL(kvm_irq_has_notifier); void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin) { + struct kvm_irq_routing_table *irq_rt; struct kvm_irq_ack_notifier *kian; int gsi, idx; trace_kvm_ack_irq(irqchip, pin); idx = srcu_read_lock(&kvm->irq_srcu); - gsi = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu)->chip[irqchip][pin]; + irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu); + gsi = kvm_irq_map_chip_pin(irq_rt, irqchip, pin); if (gsi != -1) hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list, link) @@ -115,8 +141,8 @@ int kvm_send_userspace_msi(struct kvm *kvm, struct kvm_msi *msi) int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level, bool line_status) { - struct kvm_kernel_irq_routing_entry *e, irq_set[KVM_NR_IRQCHIPS]; - int ret = -1, i = 0, idx; + struct kvm_kernel_irq_routing_entry irq_set[KVM_NR_IRQCHIPS]; + int ret = -1, i, idx; struct kvm_irq_routing_table *irq_rt; trace_kvm_set_irq(irq, level, irq_source_id); @@ -127,9 +153,7 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level, */ idx = srcu_read_lock(&kvm->irq_srcu); irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu); - if (irq < irq_rt->nr_rt_entries) - hlist_for_each_entry(e, &irq_rt->map[irq], link) - irq_set[i++] = *e; + i = kvm_irq_map_gsi(irq_set, irq_rt, irq); srcu_read_unlock(&kvm->irq_srcu, idx); while(i--) { @@ -171,9 +195,11 @@ static int setup_routing_entry(struct kvm_irq_routing_table *rt, e->gsi = ue->gsi; e->type = ue->type; - r = kvm_set_routing_entry(rt, e, ue); + r = kvm_set_routing_entry(e, ue); if (r) goto out; + if (e->type == KVM_IRQ_ROUTING_IRQCHIP) + rt->chip[e->irqchip.irqchip][e->irqchip.pin] = e->gsi; hlist_add_head(&e->link, &rt->map[e->gsi]); r = 0; -- cgit v1.2.3 From 30ab56f1557ad921aafffd4c8f5c611397cd6cdd Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Mon, 30 Jun 2014 20:51:11 +1000 Subject: KVM: Move all accesses to kvm::irq_routing into irqchip.c Now that struct _irqfd does not keep a reference to storage pointed to by the irq_routing field of struct kvm, we can move the statement that updates it out from under the irqfds.lock and put it in kvm_set_irq_routing() instead. That means we then have to take a srcu_read_lock on kvm->irq_srcu around the irqfd_update call in kvm_irqfd_assign(), since holding the kvm->irqfds.lock no longer ensures that that the routing can't change. Combined with changing kvm_irq_map_gsi() and kvm_irq_map_chip_pin() to take a struct kvm * argument instead of the pointer to the routing table, this allows us to to move all references to kvm->irq_routing into irqchip.c. That in turn allows us to move the definition of the kvm_irq_routing_table struct into irqchip.c as well. Signed-off-by: Paul Mackerras Tested-by: Eric Auger Tested-by: Cornelia Huck Signed-off-by: Paolo Bonzini (cherry picked from commit 9957c86d659a4d5a2bed25ccbd3bfc9c3f25e658) Signed-off-by: Christoffer Dall --- virt/kvm/eventfd.c | 22 +++++++++------------- virt/kvm/irq_comm.c | 6 ++---- virt/kvm/irqchip.c | 39 +++++++++++++++++++++++++-------------- 3 files changed, 36 insertions(+), 31 deletions(-) (limited to 'virt') diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index 01698e36e245..cacd30ec316e 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c @@ -278,14 +278,13 @@ irqfd_ptable_queue_proc(struct file *file, wait_queue_head_t *wqh, } /* Must be called under irqfds.lock */ -static void irqfd_update(struct kvm *kvm, struct _irqfd *irqfd, - struct kvm_irq_routing_table *irq_rt) +static void irqfd_update(struct kvm *kvm, struct _irqfd *irqfd) { struct kvm_kernel_irq_routing_entry *e; struct kvm_kernel_irq_routing_entry entries[KVM_NR_IRQCHIPS]; int i, n_entries; - n_entries = kvm_irq_map_gsi(entries, irq_rt, irqfd->gsi); + n_entries = kvm_irq_map_gsi(kvm, entries, irqfd->gsi); write_seqcount_begin(&irqfd->irq_entry_sc); @@ -304,12 +303,12 @@ static void irqfd_update(struct kvm *kvm, struct _irqfd *irqfd, static int kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args) { - struct kvm_irq_routing_table *irq_rt; struct _irqfd *irqfd, *tmp; struct fd f; struct eventfd_ctx *eventfd = NULL, *resamplefd = NULL; int ret; unsigned int events; + int idx; irqfd = kzalloc(sizeof(*irqfd), GFP_KERNEL); if (!irqfd) @@ -403,9 +402,9 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args) goto fail; } - irq_rt = rcu_dereference_protected(kvm->irq_routing, - lockdep_is_held(&kvm->irqfds.lock)); - irqfd_update(kvm, irqfd, irq_rt); + idx = srcu_read_lock(&kvm->irq_srcu); + irqfd_update(kvm, irqfd); + srcu_read_unlock(&kvm->irq_srcu, idx); events = f.file->f_op->poll(f.file, &irqfd->pt); @@ -539,20 +538,17 @@ kvm_irqfd_release(struct kvm *kvm) } /* - * Change irq_routing and irqfd. + * Take note of a change in irq routing. * Caller must invoke synchronize_srcu(&kvm->irq_srcu) afterwards. */ -void kvm_irq_routing_update(struct kvm *kvm, - struct kvm_irq_routing_table *irq_rt) +void kvm_irq_routing_update(struct kvm *kvm) { struct _irqfd *irqfd; spin_lock_irq(&kvm->irqfds.lock); - rcu_assign_pointer(kvm->irq_routing, irq_rt); - list_for_each_entry(irqfd, &kvm->irqfds.items, list) - irqfd_update(kvm, irqfd, irq_rt); + irqfd_update(kvm, irqfd); spin_unlock_irq(&kvm->irqfds.lock); } diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c index 175844593243..963b8995a9e8 100644 --- a/virt/kvm/irq_comm.c +++ b/virt/kvm/irq_comm.c @@ -163,7 +163,6 @@ int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq, int level) struct kvm_kernel_irq_routing_entry entries[KVM_NR_IRQCHIPS]; struct kvm_kernel_irq_routing_entry *e; int ret = -EINVAL; - struct kvm_irq_routing_table *irq_rt; int idx; trace_kvm_set_irq(irq, level, irq_source_id); @@ -177,8 +176,7 @@ int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq, int level) * which is limited to 1:1 GSI mapping. */ idx = srcu_read_lock(&kvm->irq_srcu); - irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu); - if (kvm_irq_map_gsi(entries, irq_rt, irq) > 0) { + if (kvm_irq_map_gsi(kvm, entries, irq) > 0) { e = &entries[0]; if (likely(e->type == KVM_IRQ_ROUTING_MSI)) ret = kvm_set_msi_inatomic(e, kvm); @@ -264,7 +262,7 @@ void kvm_fire_mask_notifiers(struct kvm *kvm, unsigned irqchip, unsigned pin, int idx, gsi; idx = srcu_read_lock(&kvm->irq_srcu); - gsi = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu)->chip[irqchip][pin]; + gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin); if (gsi != -1) hlist_for_each_entry_rcu(kimn, &kvm->mask_notifier_list, link) if (kimn->irq == gsi) diff --git a/virt/kvm/irqchip.c b/virt/kvm/irqchip.c index f4648dd94888..04faac50cef5 100644 --- a/virt/kvm/irqchip.c +++ b/virt/kvm/irqchip.c @@ -31,12 +31,26 @@ #include #include "irq.h" -int kvm_irq_map_gsi(struct kvm_kernel_irq_routing_entry *entries, - struct kvm_irq_routing_table *irq_rt, int gsi) +struct kvm_irq_routing_table { + int chip[KVM_NR_IRQCHIPS][KVM_IRQCHIP_NUM_PINS]; + struct kvm_kernel_irq_routing_entry *rt_entries; + u32 nr_rt_entries; + /* + * Array indexed by gsi. Each entry contains list of irq chips + * the gsi is connected to. + */ + struct hlist_head map[0]; +}; + +int kvm_irq_map_gsi(struct kvm *kvm, + struct kvm_kernel_irq_routing_entry *entries, int gsi) { + struct kvm_irq_routing_table *irq_rt; struct kvm_kernel_irq_routing_entry *e; int n = 0; + irq_rt = srcu_dereference_check(kvm->irq_routing, &kvm->irq_srcu, + lockdep_is_held(&kvm->irq_lock)); if (gsi < irq_rt->nr_rt_entries) { hlist_for_each_entry(e, &irq_rt->map[gsi], link) { entries[n] = *e; @@ -47,21 +61,21 @@ int kvm_irq_map_gsi(struct kvm_kernel_irq_routing_entry *entries, return n; } -int kvm_irq_map_chip_pin(struct kvm_irq_routing_table *irq_rt, - unsigned irqchip, unsigned pin) +int kvm_irq_map_chip_pin(struct kvm *kvm, unsigned irqchip, unsigned pin) { + struct kvm_irq_routing_table *irq_rt; + + irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu); return irq_rt->chip[irqchip][pin]; } bool kvm_irq_has_notifier(struct kvm *kvm, unsigned irqchip, unsigned pin) { - struct kvm_irq_routing_table *irq_rt; struct kvm_irq_ack_notifier *kian; int gsi, idx; idx = srcu_read_lock(&kvm->irq_srcu); - irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu); - gsi = kvm_irq_map_chip_pin(irq_rt, irqchip, pin); + gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin); if (gsi != -1) hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list, link) @@ -78,15 +92,13 @@ EXPORT_SYMBOL_GPL(kvm_irq_has_notifier); void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin) { - struct kvm_irq_routing_table *irq_rt; struct kvm_irq_ack_notifier *kian; int gsi, idx; trace_kvm_ack_irq(irqchip, pin); idx = srcu_read_lock(&kvm->irq_srcu); - irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu); - gsi = kvm_irq_map_chip_pin(irq_rt, irqchip, pin); + gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin); if (gsi != -1) hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list, link) @@ -143,7 +155,6 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level, { struct kvm_kernel_irq_routing_entry irq_set[KVM_NR_IRQCHIPS]; int ret = -1, i, idx; - struct kvm_irq_routing_table *irq_rt; trace_kvm_set_irq(irq, level, irq_source_id); @@ -152,8 +163,7 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level, * writes to the unused one. */ idx = srcu_read_lock(&kvm->irq_srcu); - irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu); - i = kvm_irq_map_gsi(irq_set, irq_rt, irq); + i = kvm_irq_map_gsi(kvm, irq_set, irq); srcu_read_unlock(&kvm->irq_srcu, idx); while(i--) { @@ -250,7 +260,8 @@ int kvm_set_irq_routing(struct kvm *kvm, mutex_lock(&kvm->irq_lock); old = kvm->irq_routing; - kvm_irq_routing_update(kvm, new); + rcu_assign_pointer(kvm->irq_routing, new); + kvm_irq_routing_update(kvm); mutex_unlock(&kvm->irq_lock); synchronize_srcu_expedited(&kvm->irq_srcu); -- cgit v1.2.3 From af76da6591d9716f687dce16be2948442323c133 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Mon, 30 Jun 2014 20:51:12 +1000 Subject: KVM: Move irq notifier implementation into eventfd.c This moves the functions kvm_irq_has_notifier(), kvm_notify_acked_irq(), kvm_register_irq_ack_notifier() and kvm_unregister_irq_ack_notifier() from irqchip.c to eventfd.c. The reason for doing this is that those functions are used in connection with IRQFDs, which are implemented in eventfd.c. In future we will want to use IRQFDs on platforms that don't implement the GSI routing implemented in irqchip.c, so we won't be compiling in irqchip.c, but we still need the irq notifiers. The implementation is unchanged. Signed-off-by: Paul Mackerras Tested-by: Eric Auger Tested-by: Cornelia Huck Signed-off-by: Paolo Bonzini (cherry picked from commit e4d57e1ee1ab59f0cef0272800ac6c52e0ec814a) Signed-off-by: Christoffer Dall --- virt/kvm/eventfd.c | 63 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ virt/kvm/irqchip.c | 61 ---------------------------------------------------- 2 files changed, 63 insertions(+), 61 deletions(-) (limited to 'virt') diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index cacd30ec316e..6e0a4e6794f4 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c @@ -34,7 +34,9 @@ #include #include #include +#include +#include "irq.h" #include "iodev.h" #ifdef CONFIG_HAVE_KVM_IRQ_ROUTING @@ -832,3 +834,64 @@ kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) return kvm_assign_ioeventfd(kvm, args); } + +bool kvm_irq_has_notifier(struct kvm *kvm, unsigned irqchip, unsigned pin) +{ + struct kvm_irq_ack_notifier *kian; + int gsi, idx; + + idx = srcu_read_lock(&kvm->irq_srcu); + gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin); + if (gsi != -1) + hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list, + link) + if (kian->gsi == gsi) { + srcu_read_unlock(&kvm->irq_srcu, idx); + return true; + } + + srcu_read_unlock(&kvm->irq_srcu, idx); + + return false; +} +EXPORT_SYMBOL_GPL(kvm_irq_has_notifier); + +void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin) +{ + struct kvm_irq_ack_notifier *kian; + int gsi, idx; + + trace_kvm_ack_irq(irqchip, pin); + + idx = srcu_read_lock(&kvm->irq_srcu); + gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin); + if (gsi != -1) + hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list, + link) + if (kian->gsi == gsi) + kian->irq_acked(kian); + srcu_read_unlock(&kvm->irq_srcu, idx); +} + +void kvm_register_irq_ack_notifier(struct kvm *kvm, + struct kvm_irq_ack_notifier *kian) +{ + mutex_lock(&kvm->irq_lock); + hlist_add_head_rcu(&kian->link, &kvm->irq_ack_notifier_list); + mutex_unlock(&kvm->irq_lock); +#ifdef __KVM_HAVE_IOAPIC + kvm_vcpu_request_scan_ioapic(kvm); +#endif +} + +void kvm_unregister_irq_ack_notifier(struct kvm *kvm, + struct kvm_irq_ack_notifier *kian) +{ + mutex_lock(&kvm->irq_lock); + hlist_del_init_rcu(&kian->link); + mutex_unlock(&kvm->irq_lock); + synchronize_srcu(&kvm->irq_srcu); +#ifdef __KVM_HAVE_IOAPIC + kvm_vcpu_request_scan_ioapic(kvm); +#endif +} diff --git a/virt/kvm/irqchip.c b/virt/kvm/irqchip.c index 04faac50cef5..7f256f31df10 100644 --- a/virt/kvm/irqchip.c +++ b/virt/kvm/irqchip.c @@ -69,67 +69,6 @@ int kvm_irq_map_chip_pin(struct kvm *kvm, unsigned irqchip, unsigned pin) return irq_rt->chip[irqchip][pin]; } -bool kvm_irq_has_notifier(struct kvm *kvm, unsigned irqchip, unsigned pin) -{ - struct kvm_irq_ack_notifier *kian; - int gsi, idx; - - idx = srcu_read_lock(&kvm->irq_srcu); - gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin); - if (gsi != -1) - hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list, - link) - if (kian->gsi == gsi) { - srcu_read_unlock(&kvm->irq_srcu, idx); - return true; - } - - srcu_read_unlock(&kvm->irq_srcu, idx); - - return false; -} -EXPORT_SYMBOL_GPL(kvm_irq_has_notifier); - -void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin) -{ - struct kvm_irq_ack_notifier *kian; - int gsi, idx; - - trace_kvm_ack_irq(irqchip, pin); - - idx = srcu_read_lock(&kvm->irq_srcu); - gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin); - if (gsi != -1) - hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list, - link) - if (kian->gsi == gsi) - kian->irq_acked(kian); - srcu_read_unlock(&kvm->irq_srcu, idx); -} - -void kvm_register_irq_ack_notifier(struct kvm *kvm, - struct kvm_irq_ack_notifier *kian) -{ - mutex_lock(&kvm->irq_lock); - hlist_add_head_rcu(&kian->link, &kvm->irq_ack_notifier_list); - mutex_unlock(&kvm->irq_lock); -#ifdef __KVM_HAVE_IOAPIC - kvm_vcpu_request_scan_ioapic(kvm); -#endif -} - -void kvm_unregister_irq_ack_notifier(struct kvm *kvm, - struct kvm_irq_ack_notifier *kian) -{ - mutex_lock(&kvm->irq_lock); - hlist_del_init_rcu(&kian->link); - mutex_unlock(&kvm->irq_lock); - synchronize_srcu(&kvm->irq_srcu); -#ifdef __KVM_HAVE_IOAPIC - kvm_vcpu_request_scan_ioapic(kvm); -#endif -} - int kvm_send_userspace_msi(struct kvm *kvm, struct kvm_msi *msi) { struct kvm_kernel_irq_routing_entry route; -- cgit v1.2.3 From 0c226cbe862216a44597556ab2ada3b69566ec64 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Thu, 2 Oct 2014 22:22:54 +0200 Subject: KVM: Give IRQFD its own separate enabling Kconfig option Currently, the IRQFD code is conditional on CONFIG_HAVE_KVM_IRQ_ROUTING. So that we can have the IRQFD code compiled in without having the IRQ routing code, this creates a new CONFIG_HAVE_KVM_IRQFD, makes the IRQFD code conditional on it instead of CONFIG_HAVE_KVM_IRQ_ROUTING, and makes all the platforms that currently select HAVE_KVM_IRQ_ROUTING also select HAVE_KVM_IRQFD. Signed-off-by: Paul Mackerras Tested-by: Eric Auger Tested-by: Cornelia Huck Signed-off-by: Paolo Bonzini (cherry picked from commit 297e21053a52f060944e9f0de4c64fad9bcd72fc) Signed-off-by: Christoffer Dall --- virt/kvm/Kconfig | 3 +++ virt/kvm/eventfd.c | 6 +++--- virt/kvm/kvm_main.c | 2 +- 3 files changed, 7 insertions(+), 4 deletions(-) (limited to 'virt') diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig index 13f2d19793e3..fc0c5e603eb4 100644 --- a/virt/kvm/Kconfig +++ b/virt/kvm/Kconfig @@ -6,6 +6,9 @@ config HAVE_KVM config HAVE_KVM_IRQCHIP bool +config HAVE_KVM_IRQFD + bool + config HAVE_KVM_IRQ_ROUTING bool diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index 6e0a4e6794f4..d36ada5e16bb 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c @@ -39,7 +39,7 @@ #include "irq.h" #include "iodev.h" -#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING +#ifdef CONFIG_HAVE_KVM_IRQFD /* * -------------------------------------------------------------------- * irqfd: Allows an fd to be used to inject an interrupt to the guest @@ -450,7 +450,7 @@ out: void kvm_eventfd_init(struct kvm *kvm) { -#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING +#ifdef CONFIG_HAVE_KVM_IRQFD spin_lock_init(&kvm->irqfds.lock); INIT_LIST_HEAD(&kvm->irqfds.items); INIT_LIST_HEAD(&kvm->irqfds.resampler_list); @@ -459,7 +459,7 @@ kvm_eventfd_init(struct kvm *kvm) INIT_LIST_HEAD(&kvm->ioeventfds); } -#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING +#ifdef CONFIG_HAVE_KVM_IRQFD /* * shutdown any irqfd's that match fd+gsi */ diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 9e99bfb10ba4..da1806834645 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2334,7 +2334,7 @@ static long kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg) #ifdef CONFIG_HAVE_KVM_MSI case KVM_CAP_SIGNAL_MSI: #endif -#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING +#ifdef CONFIG_HAVE_KVM_IRQFD case KVM_CAP_IRQFD_RESAMPLE: #endif case KVM_CAP_CHECK_EXTENSION_VM: -- cgit v1.2.3 From 194fc565e2807c808cafe9ac4d6ef4d11b5befc0 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 6 Aug 2014 14:24:45 +0200 Subject: KVM: Move more code under CONFIG_HAVE_KVM_IRQFD Commits e4d57e1ee1ab (KVM: Move irq notifier implementation into eventfd.c, 2014-06-30) included the irq notifier code unconditionally in eventfd.c, while it was under CONFIG_HAVE_KVM_IRQCHIP before. Similarly, commit 297e21053a52 (KVM: Give IRQFD its own separate enabling Kconfig option, 2014-06-30) moved code from CONFIG_HAVE_IRQ_ROUTING to CONFIG_HAVE_KVM_IRQFD but forgot to move the pieces that used to be under CONFIG_HAVE_KVM_IRQCHIP. Together, this broke compilation without CONFIG_KVM_XICS. Fix by adding or changing the #ifdefs so that they point at CONFIG_HAVE_KVM_IRQFD. Signed-off-by: Paolo Bonzini (cherry picked from commit c77dcacb397519b6ade8f08201a4a90a7f4f751e) Signed-off-by: Christoffer Dall --- virt/kvm/eventfd.c | 122 ++++++++++++++++++++++++++-------------------------- virt/kvm/kvm_main.c | 2 + 2 files changed, 63 insertions(+), 61 deletions(-) (limited to 'virt') diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index d36ada5e16bb..25b34d7a8114 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c @@ -445,6 +445,67 @@ out: kfree(irqfd); return ret; } + +bool kvm_irq_has_notifier(struct kvm *kvm, unsigned irqchip, unsigned pin) +{ + struct kvm_irq_ack_notifier *kian; + int gsi, idx; + + idx = srcu_read_lock(&kvm->irq_srcu); + gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin); + if (gsi != -1) + hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list, + link) + if (kian->gsi == gsi) { + srcu_read_unlock(&kvm->irq_srcu, idx); + return true; + } + + srcu_read_unlock(&kvm->irq_srcu, idx); + + return false; +} +EXPORT_SYMBOL_GPL(kvm_irq_has_notifier); + +void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin) +{ + struct kvm_irq_ack_notifier *kian; + int gsi, idx; + + trace_kvm_ack_irq(irqchip, pin); + + idx = srcu_read_lock(&kvm->irq_srcu); + gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin); + if (gsi != -1) + hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list, + link) + if (kian->gsi == gsi) + kian->irq_acked(kian); + srcu_read_unlock(&kvm->irq_srcu, idx); +} + +void kvm_register_irq_ack_notifier(struct kvm *kvm, + struct kvm_irq_ack_notifier *kian) +{ + mutex_lock(&kvm->irq_lock); + hlist_add_head_rcu(&kian->link, &kvm->irq_ack_notifier_list); + mutex_unlock(&kvm->irq_lock); +#ifdef __KVM_HAVE_IOAPIC + kvm_vcpu_request_scan_ioapic(kvm); +#endif +} + +void kvm_unregister_irq_ack_notifier(struct kvm *kvm, + struct kvm_irq_ack_notifier *kian) +{ + mutex_lock(&kvm->irq_lock); + hlist_del_init_rcu(&kian->link); + mutex_unlock(&kvm->irq_lock); + synchronize_srcu(&kvm->irq_srcu); +#ifdef __KVM_HAVE_IOAPIC + kvm_vcpu_request_scan_ioapic(kvm); +#endif +} #endif void @@ -834,64 +895,3 @@ kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) return kvm_assign_ioeventfd(kvm, args); } - -bool kvm_irq_has_notifier(struct kvm *kvm, unsigned irqchip, unsigned pin) -{ - struct kvm_irq_ack_notifier *kian; - int gsi, idx; - - idx = srcu_read_lock(&kvm->irq_srcu); - gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin); - if (gsi != -1) - hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list, - link) - if (kian->gsi == gsi) { - srcu_read_unlock(&kvm->irq_srcu, idx); - return true; - } - - srcu_read_unlock(&kvm->irq_srcu, idx); - - return false; -} -EXPORT_SYMBOL_GPL(kvm_irq_has_notifier); - -void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin) -{ - struct kvm_irq_ack_notifier *kian; - int gsi, idx; - - trace_kvm_ack_irq(irqchip, pin); - - idx = srcu_read_lock(&kvm->irq_srcu); - gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin); - if (gsi != -1) - hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list, - link) - if (kian->gsi == gsi) - kian->irq_acked(kian); - srcu_read_unlock(&kvm->irq_srcu, idx); -} - -void kvm_register_irq_ack_notifier(struct kvm *kvm, - struct kvm_irq_ack_notifier *kian) -{ - mutex_lock(&kvm->irq_lock); - hlist_add_head_rcu(&kian->link, &kvm->irq_ack_notifier_list); - mutex_unlock(&kvm->irq_lock); -#ifdef __KVM_HAVE_IOAPIC - kvm_vcpu_request_scan_ioapic(kvm); -#endif -} - -void kvm_unregister_irq_ack_notifier(struct kvm *kvm, - struct kvm_irq_ack_notifier *kian) -{ - mutex_lock(&kvm->irq_lock); - hlist_del_init_rcu(&kian->link); - mutex_unlock(&kvm->irq_lock); - synchronize_srcu(&kvm->irq_srcu); -#ifdef __KVM_HAVE_IOAPIC - kvm_vcpu_request_scan_ioapic(kvm); -#endif -} diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index da1806834645..00872ddf8664 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -465,6 +465,8 @@ static struct kvm *kvm_create_vm(unsigned long type) #ifdef CONFIG_HAVE_KVM_IRQCHIP INIT_HLIST_HEAD(&kvm->mask_notifier_list); +#endif +#ifdef CONFIG_HAVE_KVM_IRQFD INIT_HLIST_HEAD(&kvm->irq_ack_notifier_list); #endif -- cgit v1.2.3 From 13dd312e32c02c90572eb3b3151c247d5eedf89a Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Tue, 19 Aug 2014 16:45:56 +0200 Subject: KVM: avoid unnecessary synchronize_rcu We dont have to wait for a grace period if there is no oldpid that we are going to free. putpid also checks for NULL, so this patch only fences synchronize_rcu. Signed-off-by: Christian Borntraeger Signed-off-by: Paolo Bonzini (cherry picked from commit 7103f60de8bed21a0ad5d15d2ad5b7a333dda201) Signed-off-by: Christoffer Dall --- virt/kvm/kvm_main.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'virt') diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 00872ddf8664..b83ea5c1ef83 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -129,7 +129,8 @@ int vcpu_load(struct kvm_vcpu *vcpu) struct pid *oldpid = vcpu->pid; struct pid *newpid = get_task_pid(current, PIDTYPE_PID); rcu_assign_pointer(vcpu->pid, newpid); - synchronize_rcu(); + if (oldpid) + synchronize_rcu(); put_pid(oldpid); } cpu = get_cpu(); -- cgit v1.2.3 From 2095b40eedde9892c20a75e8ca64dbfb0449cbe3 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Thu, 2 Oct 2014 22:23:15 +0200 Subject: KVM: add kvm_arch_sched_in MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduce preempt notifiers for architecture specific code. Advantage over creating a new notifier in every arch is slightly simpler code and guaranteed call order with respect to kvm_sched_in. Signed-off-by: Radim Krčmář Signed-off-by: Paolo Bonzini (cherry picked from commit e790d9ef6405633b007339d746b709aed43a928d) Signed-off-by: Christoffer Dall --- virt/kvm/kvm_main.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'virt') diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index b83ea5c1ef83..26a6de44079b 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -3120,6 +3120,8 @@ static void kvm_sched_in(struct preempt_notifier *pn, int cpu) if (vcpu->preempted) vcpu->preempted = false; + kvm_arch_sched_in(vcpu, cpu); + kvm_arch_vcpu_load(vcpu, cpu); } -- cgit v1.2.3 From 83e21bca5ae9d0fb72143339af66af4cb6aba0b4 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Tue, 19 Aug 2014 12:15:00 +0200 Subject: KVM: Introduce gfn_to_hva_memslot_prot To support read-only memory regions on arm and arm64, we have a need to resolve a gfn to an hva given a pointer to a memslot to avoid looping through the memslots twice and to reuse the hva error checking of gfn_to_hva_prot(), add a new gfn_to_hva_memslot_prot() function and refactor gfn_to_hva_prot() to use this function. Acked-by: Marc Zyngier Signed-off-by: Christoffer Dall (cherry picked from commit 64d831269ccbca1fc6d739a0f3c8aa24afb43a5e) Signed-off-by: Christoffer Dall --- virt/kvm/kvm_main.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'virt') diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 26a6de44079b..20b6ec87dfaa 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1078,9 +1078,9 @@ EXPORT_SYMBOL_GPL(gfn_to_hva); * If writable is set to false, the hva returned by this function is only * allowed to be read. */ -unsigned long gfn_to_hva_prot(struct kvm *kvm, gfn_t gfn, bool *writable) +unsigned long gfn_to_hva_memslot_prot(struct kvm_memory_slot *slot, + gfn_t gfn, bool *writable) { - struct kvm_memory_slot *slot = gfn_to_memslot(kvm, gfn); unsigned long hva = __gfn_to_hva_many(slot, gfn, NULL, false); if (!kvm_is_error_hva(hva) && writable) @@ -1089,6 +1089,13 @@ unsigned long gfn_to_hva_prot(struct kvm *kvm, gfn_t gfn, bool *writable) return hva; } +unsigned long gfn_to_hva_prot(struct kvm *kvm, gfn_t gfn, bool *writable) +{ + struct kvm_memory_slot *slot = gfn_to_memslot(kvm, gfn); + + return gfn_to_hva_memslot_prot(slot, gfn, writable); +} + static int kvm_read_hva(void *data, void __user *hva, int len) { return __copy_from_user(data, hva, len); -- cgit v1.2.3 From 750453a7c2a7222b47fa797eb2fdb4d99e3eddce Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 26 Aug 2014 15:13:24 +0100 Subject: KVM: vgic: return int instead of bool when checking I/O ranges vgic_ioaddr_overlap claims to return a bool, but in reality it returns an int. Shut sparse up by fixing the type signature. Cc: Christoffer Dall Cc: Marc Zyngier Signed-off-by: Will Deacon Signed-off-by: Christoffer Dall (cherry picked from commit 1fa451bcc67fa921a04c5fac8dbcde7844d54512) Signed-off-by: Christoffer Dall --- virt/kvm/arm/vgic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'virt') diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index 8823cccfe884..3e81bc49c0b3 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -1690,7 +1690,7 @@ out: return ret; } -static bool vgic_ioaddr_overlap(struct kvm *kvm) +static int vgic_ioaddr_overlap(struct kvm *kvm) { phys_addr_t dist = kvm->arch.vgic.vgic_dist_base; phys_addr_t cpu = kvm->arch.vgic.vgic_cpu_base; -- cgit v1.2.3 From 414d49da2d0c9a808305ed91c5d02799506cffb8 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 26 Aug 2014 15:13:25 +0100 Subject: KVM: vgic: declare probe function pointer as const We extract the vgic probe function from the of_device_id data pointer, which is const. Kill the sparse warning by ensuring that the local function pointer is also marked as const. Cc: Marc Zyngier Signed-off-by: Will Deacon Signed-off-by: Christoffer Dall (cherry picked from commit de56fb1923ca11f428bf557870e0faa99f38762e) Signed-off-by: Christoffer Dall --- virt/kvm/arm/vgic.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'virt') diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index 3e81bc49c0b3..a4f722a2a968 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -1557,8 +1557,8 @@ static const struct of_device_id vgic_ids[] = { int kvm_vgic_hyp_init(void) { const struct of_device_id *matched_id; - int (*vgic_probe)(struct device_node *,const struct vgic_ops **, - const struct vgic_params **); + const int (*vgic_probe)(struct device_node *,const struct vgic_ops **, + const struct vgic_params **); struct device_node *vgic_node; int ret; -- cgit v1.2.3 From 2e65f2aa7d8f322170bbb698b454eeac34a5c5ad Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Tue, 26 Aug 2014 14:00:37 +0200 Subject: KVM: Unconditionally export KVM_CAP_READONLY_MEM The idea between capabilities and the KVM_CHECK_EXTENSION ioctl is that userspace can, at run-time, determine if a feature is supported or not. This allows KVM to being supporting a new feature with a new kernel version without any need to update user space. Unfortunately, since the definition of KVM_CAP_READONLY_MEM was guarded by #ifdef __KVM_HAVE_READONLY_MEM, such discovery still required a user space update. Therefore, unconditionally export KVM_CAP_READONLY_MEM and change the in-kernel conditional to rely on __KVM_HAVE_READONLY_MEM. Signed-off-by: Christoffer Dall Signed-off-by: Paolo Bonzini (cherry picked from commit 0f8a4de3e088797576ac76200b634b802e5c7781) Signed-off-by: Christoffer Dall --- virt/kvm/kvm_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'virt') diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 20b6ec87dfaa..7381ec630282 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -710,7 +710,7 @@ static int check_memory_region_flags(struct kvm_userspace_memory_region *mem) { u32 valid_flags = KVM_MEM_LOG_DIRTY_PAGES; -#ifdef KVM_CAP_READONLY_MEM +#ifdef __KVM_HAVE_READONLY_MEM valid_flags |= KVM_MEM_READONLY; #endif -- cgit v1.2.3 From 47a53256d4ac266089f050d5dce2cd53249d1e74 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Thu, 2 Oct 2014 22:29:59 +0200 Subject: KVM: remove garbage arg to *hardware_{en,dis}able MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In the beggining was on_each_cpu(), which required an unused argument to kvm_arch_ops.hardware_{en,dis}able, but this was soon forgotten. Remove unnecessary arguments that stem from this. Signed-off-by: Radim Krčmář Signed-off-by: Paolo Bonzini (cherry picked from commit 13a34e067eab24fec882e1834fbf2cc31911d474) Signed-off-by: Christoffer Dall --- virt/kvm/kvm_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'virt') diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 7381ec630282..4832924e4c8c 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2673,7 +2673,7 @@ static void hardware_enable_nolock(void *junk) cpumask_set_cpu(cpu, cpus_hardware_enabled); - r = kvm_arch_hardware_enable(NULL); + r = kvm_arch_hardware_enable(); if (r) { cpumask_clear_cpu(cpu, cpus_hardware_enabled); @@ -2698,7 +2698,7 @@ static void hardware_disable_nolock(void *junk) if (!cpumask_test_cpu(cpu, cpus_hardware_enabled)) return; cpumask_clear_cpu(cpu, cpus_hardware_enabled); - kvm_arch_hardware_disable(NULL); + kvm_arch_hardware_disable(); } static void hardware_disable(void) -- cgit v1.2.3 From 6adc694a20385452b2d24e003e64df2c64656253 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 20 Aug 2014 14:29:21 +0200 Subject: KVM: do not bias the generation number in kvm_current_mmio_generation The next patch will give a meaning (a la seqcount) to the low bit of the generation number. Ensure that it matches between kvm->memslots->generation and kvm_current_mmio_generation(). Cc: stable@vger.kernel.org Reviewed-by: David Matlack Reviewed-by: Xiao Guangrong Signed-off-by: Paolo Bonzini (cherry picked from commit 00f034a12fdd81210d58116326d92780aac5c238) Signed-off-by: Christoffer Dall --- virt/kvm/kvm_main.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'virt') diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 4832924e4c8c..9768712659d6 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -477,6 +477,13 @@ static struct kvm *kvm_create_vm(unsigned long type) kvm->memslots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL); if (!kvm->memslots) goto out_err_no_srcu; + + /* + * Init kvm generation close to the maximum to easily test the + * code of handling generation number wrap-around. + */ + kvm->memslots->generation = -150; + kvm_init_memslots_id(kvm); if (init_srcu_struct(&kvm->srcu)) goto out_err_no_srcu; -- cgit v1.2.3 From e79761721e82a6ab499ccf7a0a6cb5e6cf858ab9 Mon Sep 17 00:00:00 2001 From: David Matlack Date: Mon, 18 Aug 2014 15:46:06 -0700 Subject: kvm: fix potentially corrupt mmio cache vcpu exits and memslot mutations can run concurrently as long as the vcpu does not aquire the slots mutex. Thus it is theoretically possible for memslots to change underneath a vcpu that is handling an exit. If we increment the memslot generation number again after synchronize_srcu_expedited(), vcpus can safely cache memslot generation without maintaining a single rcu_dereference through an entire vm exit. And much of the x86/kvm code does not maintain a single rcu_dereference of the current memslots during each exit. We can prevent the following case: vcpu (CPU 0) | thread (CPU 1) --------------------------------------------+-------------------------- 1 vm exit | 2 srcu_read_unlock(&kvm->srcu) | 3 decide to cache something based on | old memslots | 4 | change memslots | (increments generation) 5 | synchronize_srcu(&kvm->srcu); 6 retrieve generation # from new memslots | 7 tag cache with new memslot generation | 8 srcu_read_unlock(&kvm->srcu) | ... | | ... | | | By incrementing the generation after synchronizing with kvm->srcu readers, we ensure that the generation retrieved in (6) will become invalid soon after (8). Keeping the existing increment is not strictly necessary, but we do keep it and just move it for consistency from update_memslots to install_new_memslots. It invalidates old cached MMIOs immediately, instead of having to wait for the end of synchronize_srcu_expedited, which makes the code more clearly correct in case CPU 1 is preempted right after synchronize_srcu() returns. To avoid halving the generation space in SPTEs, always presume that the low bit of the generation is zero when reconstructing a generation number out of an SPTE. This effectively disables MMIO caching in SPTEs during the call to synchronize_srcu_expedited. Using the low bit this way is somewhat like a seqcount---where the protected thing is a cache, and instead of retrying we can simply punt if we observe the low bit to be 1. Cc: stable@vger.kernel.org Signed-off-by: David Matlack Reviewed-by: Xiao Guangrong Reviewed-by: David Matlack Signed-off-by: Paolo Bonzini (cherry picked from commit ee3d1570b58677885b4552bce8217fda7b226a68) Signed-off-by: Christoffer Dall --- virt/kvm/kvm_main.c | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) (limited to 'virt') diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 9768712659d6..4cf3757c5ebb 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -95,8 +95,6 @@ static int hardware_enable_all(void); static void hardware_disable_all(void); static void kvm_io_bus_destroy(struct kvm_io_bus *bus); -static void update_memslots(struct kvm_memslots *slots, - struct kvm_memory_slot *new, u64 last_generation); static void kvm_release_pfn_dirty(pfn_t pfn); static void mark_page_dirty_in_slot(struct kvm *kvm, @@ -697,8 +695,7 @@ static void sort_memslots(struct kvm_memslots *slots) } static void update_memslots(struct kvm_memslots *slots, - struct kvm_memory_slot *new, - u64 last_generation) + struct kvm_memory_slot *new) { if (new) { int id = new->id; @@ -709,8 +706,6 @@ static void update_memslots(struct kvm_memslots *slots, if (new->npages != npages) sort_memslots(slots); } - - slots->generation = last_generation + 1; } static int check_memory_region_flags(struct kvm_userspace_memory_region *mem) @@ -732,10 +727,24 @@ static struct kvm_memslots *install_new_memslots(struct kvm *kvm, { struct kvm_memslots *old_memslots = kvm->memslots; - update_memslots(slots, new, kvm->memslots->generation); + /* + * Set the low bit in the generation, which disables SPTE caching + * until the end of synchronize_srcu_expedited. + */ + WARN_ON(old_memslots->generation & 1); + slots->generation = old_memslots->generation + 1; + + update_memslots(slots, new); rcu_assign_pointer(kvm->memslots, slots); synchronize_srcu_expedited(&kvm->srcu); + /* + * Increment the new memslot generation a second time. This prevents + * vm exits that race with memslot updates from caching a memslot + * generation that will (potentially) be valid forever. + */ + slots->generation++; + kvm_arch_memslots_updated(kvm); return old_memslots; -- cgit v1.2.3 From b400850cc1aa44b67b6fe4afe356bc32dc0f60bf Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Thu, 4 Sep 2014 21:13:31 +0200 Subject: KVM: remove redundant check of in_spin_loop The expression `vcpu->spin_loop.in_spin_loop' is always true, because it is evaluated only when the condition `!vcpu->spin_loop.in_spin_loop' is false. Signed-off-by: Christian Borntraeger Signed-off-by: Paolo Bonzini (cherry picked from commit 34656113182b704682e23d1363417536addfec97) Signed-off-by: Christoffer Dall --- virt/kvm/kvm_main.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'virt') diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 4cf3757c5ebb..38f82caa2170 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1794,8 +1794,7 @@ static bool kvm_vcpu_eligible_for_directed_yield(struct kvm_vcpu *vcpu) bool eligible; eligible = !vcpu->spin_loop.in_spin_loop || - (vcpu->spin_loop.in_spin_loop && - vcpu->spin_loop.dy_eligible); + vcpu->spin_loop.dy_eligible; if (vcpu->spin_loop.in_spin_loop) kvm_vcpu_set_dy_eligible(vcpu, !vcpu->spin_loop.dy_eligible); -- cgit v1.2.3 From 18cf135259766d2878ecb870a057cc5cc20dbe81 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Thu, 4 Sep 2014 21:13:32 +0200 Subject: KVM: remove redundant assigment of return value in kvm_dev_ioctl The first statement of kvm_dev_ioctl is long r = -EINVAL; No need to reassign the same value. Signed-off-by: Christian Borntraeger Signed-off-by: Paolo Bonzini (cherry picked from commit a13f533b2f1d53a7c0baa7490498caeab7bc8ba5) Signed-off-by: Christoffer Dall --- virt/kvm/kvm_main.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'virt') diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 38f82caa2170..4ef039bff4ee 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2631,7 +2631,6 @@ static long kvm_dev_ioctl(struct file *filp, switch (ioctl) { case KVM_GET_API_VERSION: - r = -EINVAL; if (arg) goto out; r = KVM_API_VERSION; @@ -2643,7 +2642,6 @@ static long kvm_dev_ioctl(struct file *filp, r = kvm_vm_ioctl_check_extension_generic(NULL, arg); break; case KVM_GET_VCPU_MMAP_SIZE: - r = -EINVAL; if (arg) goto out; r = PAGE_SIZE; /* struct kvm_run */ -- cgit v1.2.3 From a4f61ba1ac80702287a93d94b70da8dda247be7b Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Thu, 4 Sep 2014 21:13:33 +0200 Subject: KVM: remove redundant assignments in __kvm_set_memory_region __kvm_set_memory_region sets r to EINVAL very early. Doing it again is not necessary. The same is true later on, where r is assigned -ENOMEM twice. Signed-off-by: Christian Borntraeger Signed-off-by: Paolo Bonzini (cherry picked from commit f2a25160887e00434ce1361007009120e1fecbda) Signed-off-by: Christoffer Dall --- virt/kvm/kvm_main.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'virt') diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 4ef039bff4ee..632f588d5b41 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -795,7 +795,6 @@ int __kvm_set_memory_region(struct kvm *kvm, base_gfn = mem->guest_phys_addr >> PAGE_SHIFT; npages = mem->memory_size >> PAGE_SHIFT; - r = -EINVAL; if (npages > KVM_MEM_MAX_NR_PAGES) goto out; @@ -809,7 +808,6 @@ int __kvm_set_memory_region(struct kvm *kvm, new.npages = npages; new.flags = mem->flags; - r = -EINVAL; if (npages) { if (!old.npages) change = KVM_MR_CREATE; @@ -865,7 +863,6 @@ int __kvm_set_memory_region(struct kvm *kvm, } if ((change == KVM_MR_DELETE) || (change == KVM_MR_MOVE)) { - r = -ENOMEM; slots = kmemdup(kvm->memslots, sizeof(struct kvm_memslots), GFP_KERNEL); if (!slots) -- cgit v1.2.3 From e8b68ced4b08063fdba8f60a88d5a03f500ef45c Mon Sep 17 00:00:00 2001 From: Eric Auger Date: Mon, 1 Sep 2014 09:36:08 +0100 Subject: KVM: EVENTFD: remove inclusion of irq.h No more needed. irq.h would be void on ARM. Acked-by: Paolo Bonzini Signed-off-by: Eric Auger Signed-off-by: Marc Zyngier (cherry picked from commit 0ba09511ddc3ff0b462f37b4fe4b9c4dccc054ec) Signed-off-by: Christoffer Dall --- virt/kvm/eventfd.c | 1 - 1 file changed, 1 deletion(-) (limited to 'virt') diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index 25b34d7a8114..0ab1411eb007 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c @@ -36,7 +36,6 @@ #include #include -#include "irq.h" #include "iodev.h" #ifdef CONFIG_HAVE_KVM_IRQFD -- cgit v1.2.3 From 8034a7993f894ac203ab60339afd495e4d462adb Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Thu, 2 Oct 2014 22:32:32 +0200 Subject: KVM: device: add simple registration mechanism for kvm_device_ops kvm_ioctl_create_device currently has knowledge of all the device types and their associated ops. This is fairly inflexible when adding support for new in-kernel device emulations, so move what we currently have out into a table, which can support dynamic registration of ops by new drivers for virtual hardware. Cc: Alex Williamson Cc: Alex Graf Cc: Gleb Natapov Cc: Paolo Bonzini Cc: Marc Zyngier Acked-by: Cornelia Huck Reviewed-by: Christoffer Dall Signed-off-by: Will Deacon Signed-off-by: Paolo Bonzini (cherry picked from commit d60eacb07053142bfb9b41582074a89a790a9d46) Signed-off-by: Christoffer Dall --- virt/kvm/kvm_main.c | 64 +++++++++++++++++++++++++++++++++-------------------- 1 file changed, 40 insertions(+), 24 deletions(-) (limited to 'virt') diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 632f588d5b41..7d84193952d7 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2281,39 +2281,55 @@ struct kvm_device *kvm_device_from_filp(struct file *filp) return filp->private_data; } -static int kvm_ioctl_create_device(struct kvm *kvm, - struct kvm_create_device *cd) -{ - struct kvm_device_ops *ops = NULL; - struct kvm_device *dev; - bool test = cd->flags & KVM_CREATE_DEVICE_TEST; - int ret; - - switch (cd->type) { +static struct kvm_device_ops *kvm_device_ops_table[KVM_DEV_TYPE_MAX] = { #ifdef CONFIG_KVM_MPIC - case KVM_DEV_TYPE_FSL_MPIC_20: - case KVM_DEV_TYPE_FSL_MPIC_42: - ops = &kvm_mpic_ops; - break; + [KVM_DEV_TYPE_FSL_MPIC_20] = &kvm_mpic_ops, + [KVM_DEV_TYPE_FSL_MPIC_42] = &kvm_mpic_ops, #endif + #ifdef CONFIG_KVM_XICS - case KVM_DEV_TYPE_XICS: - ops = &kvm_xics_ops; - break; + [KVM_DEV_TYPE_XICS] = &kvm_xics_ops, #endif + #ifdef CONFIG_KVM_VFIO - case KVM_DEV_TYPE_VFIO: - ops = &kvm_vfio_ops; - break; + [KVM_DEV_TYPE_VFIO] = &kvm_vfio_ops, #endif + #ifdef CONFIG_KVM_ARM_VGIC - case KVM_DEV_TYPE_ARM_VGIC_V2: - ops = &kvm_arm_vgic_v2_ops; - break; + [KVM_DEV_TYPE_ARM_VGIC_V2] = &kvm_arm_vgic_v2_ops, #endif - default: + +#ifdef CONFIG_S390 + [KVM_DEV_TYPE_FLIC] = &kvm_flic_ops, +#endif +}; + +int kvm_register_device_ops(struct kvm_device_ops *ops, u32 type) +{ + if (type >= ARRAY_SIZE(kvm_device_ops_table)) + return -ENOSPC; + + if (kvm_device_ops_table[type] != NULL) + return -EEXIST; + + kvm_device_ops_table[type] = ops; + return 0; +} + +static int kvm_ioctl_create_device(struct kvm *kvm, + struct kvm_create_device *cd) +{ + struct kvm_device_ops *ops = NULL; + struct kvm_device *dev; + bool test = cd->flags & KVM_CREATE_DEVICE_TEST; + int ret; + + if (cd->type >= ARRAY_SIZE(kvm_device_ops_table)) + return -ENODEV; + + ops = kvm_device_ops_table[cd->type]; + if (ops == NULL) return -ENODEV; - } if (test) return 0; -- cgit v1.2.3 From 1481a6e49e4fbdbd66faeab4d4dee77a53bcb2ba Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Thu, 2 Oct 2014 22:34:14 +0200 Subject: KVM: ARM: vgic: register kvm_device_ops dynamically Now that we have a dynamic means to register kvm_device_ops, use that for the ARM VGIC, instead of relying on the static table. [ Adapted for LSK to use register_cpu_notifier() like in the code block that was moved instead of __register_cpu_notifier(), because the series to fix double lock on cpu_add_remove_lock that defines __register_cpu_notifier() is not in LSK. - Christoffer ] Cc: Gleb Natapov Cc: Paolo Bonzini Acked-by: Marc Zyngier Reviewed-by: Christoffer Dall Signed-off-by: Will Deacon Signed-off-by: Paolo Bonzini (cherry picked from commit c06a841bf36340e9e917ce60d11a6425ac85d0bd) Signed-off-by: Christoffer Dall --- virt/kvm/arm/vgic.c | 157 ++++++++++++++++++++++++++-------------------------- virt/kvm/kvm_main.c | 4 -- 2 files changed, 79 insertions(+), 82 deletions(-) (limited to 'virt') diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index a4f722a2a968..bcfa9a32753b 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -1522,83 +1522,6 @@ int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu) return 0; } -static void vgic_init_maintenance_interrupt(void *info) -{ - enable_percpu_irq(vgic->maint_irq, 0); -} - -static int vgic_cpu_notify(struct notifier_block *self, - unsigned long action, void *cpu) -{ - switch (action) { - case CPU_STARTING: - case CPU_STARTING_FROZEN: - vgic_init_maintenance_interrupt(NULL); - break; - case CPU_DYING: - case CPU_DYING_FROZEN: - disable_percpu_irq(vgic->maint_irq); - break; - } - - return NOTIFY_OK; -} - -static struct notifier_block vgic_cpu_nb = { - .notifier_call = vgic_cpu_notify, -}; - -static const struct of_device_id vgic_ids[] = { - { .compatible = "arm,cortex-a15-gic", .data = vgic_v2_probe, }, - { .compatible = "arm,gic-v3", .data = vgic_v3_probe, }, - {}, -}; - -int kvm_vgic_hyp_init(void) -{ - const struct of_device_id *matched_id; - const int (*vgic_probe)(struct device_node *,const struct vgic_ops **, - const struct vgic_params **); - struct device_node *vgic_node; - int ret; - - vgic_node = of_find_matching_node_and_match(NULL, - vgic_ids, &matched_id); - if (!vgic_node) { - kvm_err("error: no compatible GIC node found\n"); - return -ENODEV; - } - - vgic_probe = matched_id->data; - ret = vgic_probe(vgic_node, &vgic_ops, &vgic); - if (ret) - return ret; - - ret = request_percpu_irq(vgic->maint_irq, vgic_maintenance_handler, - "vgic", kvm_get_running_vcpus()); - if (ret) { - kvm_err("Cannot register interrupt %d\n", vgic->maint_irq); - return ret; - } - - ret = register_cpu_notifier(&vgic_cpu_nb); - if (ret) { - kvm_err("Cannot register vgic CPU notifier\n"); - goto out_free_irq; - } - - /* Callback into for arch code for setup */ - vgic_arch_setup(vgic); - - on_each_cpu(vgic_init_maintenance_interrupt, NULL, 1); - - return 0; - -out_free_irq: - free_percpu_irq(vgic->maint_irq, kvm_get_running_vcpus()); - return ret; -} - /** * kvm_vgic_init - Initialize global VGIC state before running any VCPUs * @kvm: pointer to the kvm struct @@ -2062,7 +1985,7 @@ static int vgic_create(struct kvm_device *dev, u32 type) return kvm_vgic_create(dev->kvm); } -struct kvm_device_ops kvm_arm_vgic_v2_ops = { +static struct kvm_device_ops kvm_arm_vgic_v2_ops = { .name = "kvm-arm-vgic", .create = vgic_create, .destroy = vgic_destroy, @@ -2070,3 +1993,81 @@ struct kvm_device_ops kvm_arm_vgic_v2_ops = { .get_attr = vgic_get_attr, .has_attr = vgic_has_attr, }; + +static void vgic_init_maintenance_interrupt(void *info) +{ + enable_percpu_irq(vgic->maint_irq, 0); +} + +static int vgic_cpu_notify(struct notifier_block *self, + unsigned long action, void *cpu) +{ + switch (action) { + case CPU_STARTING: + case CPU_STARTING_FROZEN: + vgic_init_maintenance_interrupt(NULL); + break; + case CPU_DYING: + case CPU_DYING_FROZEN: + disable_percpu_irq(vgic->maint_irq); + break; + } + + return NOTIFY_OK; +} + +static struct notifier_block vgic_cpu_nb = { + .notifier_call = vgic_cpu_notify, +}; + +static const struct of_device_id vgic_ids[] = { + { .compatible = "arm,cortex-a15-gic", .data = vgic_v2_probe, }, + { .compatible = "arm,gic-v3", .data = vgic_v3_probe, }, + {}, +}; + +int kvm_vgic_hyp_init(void) +{ + const struct of_device_id *matched_id; + int (*vgic_probe)(struct device_node *,const struct vgic_ops **, + const struct vgic_params **); + struct device_node *vgic_node; + int ret; + + vgic_node = of_find_matching_node_and_match(NULL, + vgic_ids, &matched_id); + if (!vgic_node) { + kvm_err("error: no compatible GIC node found\n"); + return -ENODEV; + } + + vgic_probe = matched_id->data; + ret = vgic_probe(vgic_node, &vgic_ops, &vgic); + if (ret) + return ret; + + ret = request_percpu_irq(vgic->maint_irq, vgic_maintenance_handler, + "vgic", kvm_get_running_vcpus()); + if (ret) { + kvm_err("Cannot register interrupt %d\n", vgic->maint_irq); + return ret; + } + + ret = register_cpu_notifier(&vgic_cpu_nb); + if (ret) { + kvm_err("Cannot register vgic CPU notifier\n"); + goto out_free_irq; + } + + /* Callback into for arch code for setup */ + vgic_arch_setup(vgic); + + on_each_cpu(vgic_init_maintenance_interrupt, NULL, 1); + + return kvm_register_device_ops(&kvm_arm_vgic_v2_ops, + KVM_DEV_TYPE_ARM_VGIC_V2); + +out_free_irq: + free_percpu_irq(vgic->maint_irq, kvm_get_running_vcpus()); + return ret; +} diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 7d84193952d7..51e7fa766a35 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2295,10 +2295,6 @@ static struct kvm_device_ops *kvm_device_ops_table[KVM_DEV_TYPE_MAX] = { [KVM_DEV_TYPE_VFIO] = &kvm_vfio_ops, #endif -#ifdef CONFIG_KVM_ARM_VGIC - [KVM_DEV_TYPE_ARM_VGIC_V2] = &kvm_arm_vgic_v2_ops, -#endif - #ifdef CONFIG_S390 [KVM_DEV_TYPE_FLIC] = &kvm_flic_ops, #endif -- cgit v1.2.3 From c3a7643fc4e4931985390ba85f820132a829cece Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Thu, 2 Oct 2014 22:35:53 +0200 Subject: KVM: VFIO: register kvm_device_ops dynamically Now that we have a dynamic means to register kvm_device_ops, use that for the VFIO kvm device, instead of relying on the static table. This is achieved by a module_init call to register the ops with KVM. Cc: Gleb Natapov Cc: Paolo Bonzini Acked-by: Alex Williamson Signed-off-by: Will Deacon Signed-off-by: Paolo Bonzini (cherry picked from commit 80ce1639727e9d38729c34f162378508c307ca25) Signed-off-by: Christoffer Dall --- virt/kvm/kvm_main.c | 4 ---- virt/kvm/vfio.c | 22 +++++++++++++++------- 2 files changed, 15 insertions(+), 11 deletions(-) (limited to 'virt') diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 51e7fa766a35..39fad76e973f 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2291,10 +2291,6 @@ static struct kvm_device_ops *kvm_device_ops_table[KVM_DEV_TYPE_MAX] = { [KVM_DEV_TYPE_XICS] = &kvm_xics_ops, #endif -#ifdef CONFIG_KVM_VFIO - [KVM_DEV_TYPE_VFIO] = &kvm_vfio_ops, -#endif - #ifdef CONFIG_S390 [KVM_DEV_TYPE_FLIC] = &kvm_flic_ops, #endif diff --git a/virt/kvm/vfio.c b/virt/kvm/vfio.c index b4f9507ae650..4c8a2e17ed81 100644 --- a/virt/kvm/vfio.c +++ b/virt/kvm/vfio.c @@ -233,6 +233,16 @@ static void kvm_vfio_destroy(struct kvm_device *dev) kfree(dev); /* alloc by kvm_ioctl_create_device, free by .destroy */ } +static int kvm_vfio_create(struct kvm_device *dev, u32 type); + +static struct kvm_device_ops kvm_vfio_ops = { + .name = "kvm-vfio", + .create = kvm_vfio_create, + .destroy = kvm_vfio_destroy, + .set_attr = kvm_vfio_set_attr, + .has_attr = kvm_vfio_has_attr, +}; + static int kvm_vfio_create(struct kvm_device *dev, u32 type) { struct kvm_device *tmp; @@ -255,10 +265,8 @@ static int kvm_vfio_create(struct kvm_device *dev, u32 type) return 0; } -struct kvm_device_ops kvm_vfio_ops = { - .name = "kvm-vfio", - .create = kvm_vfio_create, - .destroy = kvm_vfio_destroy, - .set_attr = kvm_vfio_set_attr, - .has_attr = kvm_vfio_has_attr, -}; +static int __init kvm_vfio_ops_init(void) +{ + return kvm_register_device_ops(&kvm_vfio_ops, KVM_DEV_TYPE_VFIO); +} +module_init(kvm_vfio_ops_init); -- cgit v1.2.3 From f03decbd01957b595045b46885581a110091557f Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Mon, 9 Jun 2014 12:27:18 +0200 Subject: arm/arm64: KVM: Rename irq_state to irq_pending The irq_state field on the distributor struct is ambiguous in its meaning; the comment says it's the level of the input put, but that doesn't make much sense for edge-triggered interrupts. The code actually uses this state variable to check if the interrupt is in the pending state on the distributor so clarify the comment and rename the actual variable and accessor methods. Acked-by: Marc Zyngier Signed-off-by: Christoffer Dall (cherry picked from commit 227844f53864077ccaefe01d0960fcccc03445ce) Signed-off-by: Christoffer Dall --- virt/kvm/arm/vgic.c | 52 ++++++++++++++++++++++++++-------------------------- 1 file changed, 26 insertions(+), 26 deletions(-) (limited to 'virt') diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index bcfa9a32753b..b785f708264c 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -37,7 +37,7 @@ * * - At any time, the dist->irq_pending_on_cpu is the oracle that knows if * something is pending - * - VGIC pending interrupts are stored on the vgic.irq_state vgic + * - VGIC pending interrupts are stored on the vgic.irq_pending vgic * bitmap (this bitmap is updated by both user land ioctls and guest * mmio ops, and other in-kernel peripherals such as the * arch. timers) and indicate the 'wire' state. @@ -45,8 +45,8 @@ * recalculated * - To calculate the oracle, we need info for each cpu from * compute_pending_for_cpu, which considers: - * - PPI: dist->irq_state & dist->irq_enable - * - SPI: dist->irq_state & dist->irq_enable & dist->irq_spi_target + * - PPI: dist->irq_pending & dist->irq_enable + * - SPI: dist->irq_pending & dist->irq_enable & dist->irq_spi_target * - irq_spi_target is a 'formatted' version of the GICD_ICFGR * registers, stored on each vcpu. We only keep one bit of * information per interrupt, making sure that only one vcpu can @@ -221,21 +221,21 @@ static int vgic_dist_irq_is_pending(struct kvm_vcpu *vcpu, int irq) { struct vgic_dist *dist = &vcpu->kvm->arch.vgic; - return vgic_bitmap_get_irq_val(&dist->irq_state, vcpu->vcpu_id, irq); + return vgic_bitmap_get_irq_val(&dist->irq_pending, vcpu->vcpu_id, irq); } -static void vgic_dist_irq_set(struct kvm_vcpu *vcpu, int irq) +static void vgic_dist_irq_set_pending(struct kvm_vcpu *vcpu, int irq) { struct vgic_dist *dist = &vcpu->kvm->arch.vgic; - vgic_bitmap_set_irq_val(&dist->irq_state, vcpu->vcpu_id, irq, 1); + vgic_bitmap_set_irq_val(&dist->irq_pending, vcpu->vcpu_id, irq, 1); } -static void vgic_dist_irq_clear(struct kvm_vcpu *vcpu, int irq) +static void vgic_dist_irq_clear_pending(struct kvm_vcpu *vcpu, int irq) { struct vgic_dist *dist = &vcpu->kvm->arch.vgic; - vgic_bitmap_set_irq_val(&dist->irq_state, vcpu->vcpu_id, irq, 0); + vgic_bitmap_set_irq_val(&dist->irq_pending, vcpu->vcpu_id, irq, 0); } static void vgic_cpu_irq_set(struct kvm_vcpu *vcpu, int irq) @@ -409,7 +409,7 @@ static bool handle_mmio_set_pending_reg(struct kvm_vcpu *vcpu, struct kvm_exit_mmio *mmio, phys_addr_t offset) { - u32 *reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_state, + u32 *reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_pending, vcpu->vcpu_id, offset); vgic_reg_access(mmio, reg, offset, ACCESS_READ_VALUE | ACCESS_WRITE_SETBIT); @@ -425,7 +425,7 @@ static bool handle_mmio_clear_pending_reg(struct kvm_vcpu *vcpu, struct kvm_exit_mmio *mmio, phys_addr_t offset) { - u32 *reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_state, + u32 *reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_pending, vcpu->vcpu_id, offset); vgic_reg_access(mmio, reg, offset, ACCESS_READ_VALUE | ACCESS_WRITE_CLEARBIT); @@ -651,7 +651,7 @@ static void vgic_unqueue_irqs(struct kvm_vcpu *vcpu) * is fine, then we are only setting a few bits that were * already set. */ - vgic_dist_irq_set(vcpu, lr.irq); + vgic_dist_irq_set_pending(vcpu, lr.irq); if (lr.irq < VGIC_NR_SGIS) dist->irq_sgi_sources[vcpu_id][lr.irq] |= 1 << lr.source; lr.state &= ~LR_STATE_PENDING; @@ -932,7 +932,7 @@ static void vgic_dispatch_sgi(struct kvm_vcpu *vcpu, u32 reg) kvm_for_each_vcpu(c, vcpu, kvm) { if (target_cpus & 1) { /* Flag the SGI as pending */ - vgic_dist_irq_set(vcpu, sgi); + vgic_dist_irq_set_pending(vcpu, sgi); dist->irq_sgi_sources[c][sgi] |= 1 << vcpu_id; kvm_debug("SGI%d from CPU%d to CPU%d\n", sgi, vcpu_id, c); } @@ -952,11 +952,11 @@ static int compute_pending_for_cpu(struct kvm_vcpu *vcpu) pend_percpu = vcpu->arch.vgic_cpu.pending_percpu; pend_shared = vcpu->arch.vgic_cpu.pending_shared; - pending = vgic_bitmap_get_cpu_map(&dist->irq_state, vcpu_id); + pending = vgic_bitmap_get_cpu_map(&dist->irq_pending, vcpu_id); enabled = vgic_bitmap_get_cpu_map(&dist->irq_enabled, vcpu_id); bitmap_and(pend_percpu, pending, enabled, VGIC_NR_PRIVATE_IRQS); - pending = vgic_bitmap_get_shared_map(&dist->irq_state); + pending = vgic_bitmap_get_shared_map(&dist->irq_pending); enabled = vgic_bitmap_get_shared_map(&dist->irq_enabled); bitmap_and(pend_shared, pending, enabled, VGIC_NR_SHARED_IRQS); bitmap_and(pend_shared, pend_shared, @@ -1160,7 +1160,7 @@ static bool vgic_queue_sgi(struct kvm_vcpu *vcpu, int irq) * our emulated gic and can get rid of them. */ if (!sources) { - vgic_dist_irq_clear(vcpu, irq); + vgic_dist_irq_clear_pending(vcpu, irq); vgic_cpu_irq_clear(vcpu, irq); return true; } @@ -1175,7 +1175,7 @@ static bool vgic_queue_hwirq(struct kvm_vcpu *vcpu, int irq) if (vgic_queue_irq(vcpu, 0, irq)) { if (vgic_irq_is_edge(vcpu, irq)) { - vgic_dist_irq_clear(vcpu, irq); + vgic_dist_irq_clear_pending(vcpu, irq); vgic_cpu_irq_clear(vcpu, irq); } else { vgic_irq_set_active(vcpu, irq); @@ -1376,7 +1376,7 @@ static void vgic_kick_vcpus(struct kvm *kvm) static int vgic_validate_injection(struct kvm_vcpu *vcpu, int irq, int level) { - int is_edge = vgic_irq_is_edge(vcpu, irq); + int edge_triggered = vgic_irq_is_edge(vcpu, irq); int state = vgic_dist_irq_is_pending(vcpu, irq); /* @@ -1384,26 +1384,26 @@ static int vgic_validate_injection(struct kvm_vcpu *vcpu, int irq, int level) * - edge triggered and we have a rising edge * - level triggered and we change level */ - if (is_edge) + if (edge_triggered) return level > state; else return level != state; } -static bool vgic_update_irq_state(struct kvm *kvm, int cpuid, +static bool vgic_update_irq_pending(struct kvm *kvm, int cpuid, unsigned int irq_num, bool level) { struct vgic_dist *dist = &kvm->arch.vgic; struct kvm_vcpu *vcpu; - int is_edge, is_level; + int edge_triggered, level_triggered; int enabled; bool ret = true; spin_lock(&dist->lock); vcpu = kvm_get_vcpu(kvm, cpuid); - is_edge = vgic_irq_is_edge(vcpu, irq_num); - is_level = !is_edge; + edge_triggered = vgic_irq_is_edge(vcpu, irq_num); + level_triggered = !edge_triggered; if (!vgic_validate_injection(vcpu, irq_num, level)) { ret = false; @@ -1418,9 +1418,9 @@ static bool vgic_update_irq_state(struct kvm *kvm, int cpuid, kvm_debug("Inject IRQ%d level %d CPU%d\n", irq_num, level, cpuid); if (level) - vgic_dist_irq_set(vcpu, irq_num); + vgic_dist_irq_set_pending(vcpu, irq_num); else - vgic_dist_irq_clear(vcpu, irq_num); + vgic_dist_irq_clear_pending(vcpu, irq_num); enabled = vgic_irq_is_enabled(vcpu, irq_num); @@ -1429,7 +1429,7 @@ static bool vgic_update_irq_state(struct kvm *kvm, int cpuid, goto out; } - if (is_level && vgic_irq_is_active(vcpu, irq_num)) { + if (level_triggered && vgic_irq_is_active(vcpu, irq_num)) { /* * Level interrupt in progress, will be picked up * when EOId. @@ -1466,7 +1466,7 @@ out: int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int irq_num, bool level) { - if (vgic_update_irq_state(kvm, cpuid, irq_num, level)) + if (vgic_update_irq_pending(kvm, cpuid, irq_num, level)) vgic_kick_vcpus(kvm); return 0; -- cgit v1.2.3 From f9310d00f0611a3bb436aab05c4ab36bedb42106 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Mon, 9 Jun 2014 12:55:13 +0200 Subject: arm/arm64: KVM: Rename irq_active to irq_queued We have a special bitmap on the distributor struct to keep track of when level-triggered interrupts are queued on the list registers. This was named irq_active, which is confusing, because the active state of an interrupt as per the GIC spec is a different thing, not specifically related to edge-triggered/level-triggered configurations but rather indicates an interrupt which has been ack'ed but not yet eoi'ed. Rename the bitmap and the corresponding accessor functions to irq_queued to clarify what this is actually used for. Signed-off-by: Christoffer Dall (cherry picked from commit dbf20f9d8105cca531614c8bff9a74351e8e67e7) Signed-off-by: Christoffer Dall --- virt/kvm/arm/vgic.c | 33 +++++++++++++++++++-------------- 1 file changed, 19 insertions(+), 14 deletions(-) (limited to 'virt') diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index b785f708264c..38943ae5c13e 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -60,12 +60,12 @@ * the 'line' again. This is achieved as such: * * - When a level interrupt is moved onto a vcpu, the corresponding - * bit in irq_active is set. As long as this bit is set, the line + * bit in irq_queued is set. As long as this bit is set, the line * will be ignored for further interrupts. The interrupt is injected * into the vcpu with the GICH_LR_EOI bit set (generate a * maintenance interrupt on EOI). * - When the interrupt is EOIed, the maintenance interrupt fires, - * and clears the corresponding bit in irq_active. This allow the + * and clears the corresponding bit in irq_queued. This allows the * interrupt line to be sampled again. */ @@ -196,25 +196,25 @@ static int vgic_irq_is_enabled(struct kvm_vcpu *vcpu, int irq) return vgic_bitmap_get_irq_val(&dist->irq_enabled, vcpu->vcpu_id, irq); } -static int vgic_irq_is_active(struct kvm_vcpu *vcpu, int irq) +static int vgic_irq_is_queued(struct kvm_vcpu *vcpu, int irq) { struct vgic_dist *dist = &vcpu->kvm->arch.vgic; - return vgic_bitmap_get_irq_val(&dist->irq_active, vcpu->vcpu_id, irq); + return vgic_bitmap_get_irq_val(&dist->irq_queued, vcpu->vcpu_id, irq); } -static void vgic_irq_set_active(struct kvm_vcpu *vcpu, int irq) +static void vgic_irq_set_queued(struct kvm_vcpu *vcpu, int irq) { struct vgic_dist *dist = &vcpu->kvm->arch.vgic; - vgic_bitmap_set_irq_val(&dist->irq_active, vcpu->vcpu_id, irq, 1); + vgic_bitmap_set_irq_val(&dist->irq_queued, vcpu->vcpu_id, irq, 1); } -static void vgic_irq_clear_active(struct kvm_vcpu *vcpu, int irq) +static void vgic_irq_clear_queued(struct kvm_vcpu *vcpu, int irq) { struct vgic_dist *dist = &vcpu->kvm->arch.vgic; - vgic_bitmap_set_irq_val(&dist->irq_active, vcpu->vcpu_id, irq, 0); + vgic_bitmap_set_irq_val(&dist->irq_queued, vcpu->vcpu_id, irq, 0); } static int vgic_dist_irq_is_pending(struct kvm_vcpu *vcpu, int irq) @@ -256,6 +256,11 @@ static void vgic_cpu_irq_clear(struct kvm_vcpu *vcpu, int irq) vcpu->arch.vgic_cpu.pending_shared); } +static bool vgic_can_sample_irq(struct kvm_vcpu *vcpu, int irq) +{ + return vgic_irq_is_edge(vcpu, irq) || !vgic_irq_is_queued(vcpu, irq); +} + static u32 mmio_data_read(struct kvm_exit_mmio *mmio, u32 mask) { return le32_to_cpu(*((u32 *)mmio->data)) & mask; @@ -1079,8 +1084,8 @@ static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu) if (!vgic_irq_is_enabled(vcpu, vlr.irq)) { vgic_retire_lr(lr, vlr.irq, vcpu); - if (vgic_irq_is_active(vcpu, vlr.irq)) - vgic_irq_clear_active(vcpu, vlr.irq); + if (vgic_irq_is_queued(vcpu, vlr.irq)) + vgic_irq_clear_queued(vcpu, vlr.irq); } } } @@ -1170,7 +1175,7 @@ static bool vgic_queue_sgi(struct kvm_vcpu *vcpu, int irq) static bool vgic_queue_hwirq(struct kvm_vcpu *vcpu, int irq) { - if (vgic_irq_is_active(vcpu, irq)) + if (!vgic_can_sample_irq(vcpu, irq)) return true; /* level interrupt, already queued */ if (vgic_queue_irq(vcpu, 0, irq)) { @@ -1178,7 +1183,7 @@ static bool vgic_queue_hwirq(struct kvm_vcpu *vcpu, int irq) vgic_dist_irq_clear_pending(vcpu, irq); vgic_cpu_irq_clear(vcpu, irq); } else { - vgic_irq_set_active(vcpu, irq); + vgic_irq_set_queued(vcpu, irq); } return true; @@ -1262,7 +1267,7 @@ static bool vgic_process_maintenance(struct kvm_vcpu *vcpu) for_each_set_bit(lr, eisr_ptr, vgic->nr_lr) { struct vgic_lr vlr = vgic_get_lr(vcpu, lr); - vgic_irq_clear_active(vcpu, vlr.irq); + vgic_irq_clear_queued(vcpu, vlr.irq); WARN_ON(vlr.state & LR_STATE_MASK); vlr.state = 0; vgic_set_lr(vcpu, lr, vlr); @@ -1429,7 +1434,7 @@ static bool vgic_update_irq_pending(struct kvm *kvm, int cpuid, goto out; } - if (level_triggered && vgic_irq_is_active(vcpu, irq_num)) { + if (!vgic_can_sample_irq(vcpu, irq_num)) { /* * Level interrupt in progress, will be picked up * when EOId. -- cgit v1.2.3 From 1b93da9361acd561b3fd71afbd025b8e35b75c92 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Sat, 14 Jun 2014 22:37:33 +0200 Subject: arm/arm64: KVM: vgic: Clear queued flags on unqueue If we unqueue a level-triggered interrupt completely, and the LR does not stick around in the active state (and will therefore no longer generate a maintenance interrupt), then we should clear the queued flag so that the vgic can actually queue this level-triggered interrupt at a later time and deal with its pending state then. Note: This should actually be properly fixed to handle the active state on the distributor. Acked-by: Marc Zyngier Signed-off-by: Christoffer Dall (cherry picked from commit cced50c9280ef7ca1af48080707a170efa1adfa0) Signed-off-by: Christoffer Dall --- virt/kvm/arm/vgic.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'virt') diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index 38943ae5c13e..5d681ddcc59f 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -667,8 +667,10 @@ static void vgic_unqueue_irqs(struct kvm_vcpu *vcpu) * active), then the LR does not hold any useful info and can * be marked as free for other use. */ - if (!(lr.state & LR_STATE_MASK)) + if (!(lr.state & LR_STATE_MASK)) { vgic_retire_lr(i, lr.irq, vcpu); + vgic_irq_clear_queued(vcpu, lr.irq); + } /* Finally update the VGIC state. */ vgic_update_state(vcpu->kvm); -- cgit v1.2.3 From 86dc33c93a24bdb01ae102380e2ad98c00c4b911 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Sat, 14 Jun 2014 21:54:51 +0200 Subject: arm/arm64: KVM: vgic: Improve handling of GICD_I{CS}PENDRn Writes to GICD_ISPENDRn and GICD_ICPENDRn are currently not handled correctly for level-triggered interrupts. The spec states that for level-triggered interrupts, writes to the GICD_ISPENDRn activate the output of a flip-flop which is in turn or'ed with the actual input interrupt signal. Correspondingly, writes to GICD_ICPENDRn simply deactivates the output of that flip-flop, but does not (of course) affect the external input signal. Reads from GICC_IAR will also deactivate the flip-flop output. This requires us to track the state of the level-input separately from the state in the flip-flop. We therefore introduce two new variables on the distributor struct to track these two states. Astute readers may notice that this is introducing more state than required (because an OR of the two states gives you the pending state), but the remaining vgic code uses the pending bitmap for optimized operations to figure out, at the end of the day, if an interrupt is pending or not on the distributor side. Refactoring the code to consider the two state variables all the places where we currently access the precomputed pending value, did not look pretty. Signed-off-by: Christoffer Dall (cherry picked from commit faa1b46c3e9f4d40359aee04ff275eea5f4cae3a) Signed-off-by: Christoffer Dall --- virt/kvm/arm/vgic.c | 119 +++++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 108 insertions(+), 11 deletions(-) (limited to 'virt') diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index 5d681ddcc59f..bb78526b71e8 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -67,6 +67,11 @@ * - When the interrupt is EOIed, the maintenance interrupt fires, * and clears the corresponding bit in irq_queued. This allows the * interrupt line to be sampled again. + * - Note that level-triggered interrupts can also be set to pending from + * writes to GICD_ISPENDRn and lowering the external input line does not + * cause the interrupt to become inactive in such a situation. + * Conversely, writes to GICD_ICPENDRn do not cause the interrupt to become + * inactive as long as the external input line is held high. */ #define VGIC_ADDR_UNDEF (-1) @@ -217,6 +222,41 @@ static void vgic_irq_clear_queued(struct kvm_vcpu *vcpu, int irq) vgic_bitmap_set_irq_val(&dist->irq_queued, vcpu->vcpu_id, irq, 0); } +static int vgic_dist_irq_get_level(struct kvm_vcpu *vcpu, int irq) +{ + struct vgic_dist *dist = &vcpu->kvm->arch.vgic; + + return vgic_bitmap_get_irq_val(&dist->irq_level, vcpu->vcpu_id, irq); +} + +static void vgic_dist_irq_set_level(struct kvm_vcpu *vcpu, int irq) +{ + struct vgic_dist *dist = &vcpu->kvm->arch.vgic; + + vgic_bitmap_set_irq_val(&dist->irq_level, vcpu->vcpu_id, irq, 1); +} + +static void vgic_dist_irq_clear_level(struct kvm_vcpu *vcpu, int irq) +{ + struct vgic_dist *dist = &vcpu->kvm->arch.vgic; + + vgic_bitmap_set_irq_val(&dist->irq_level, vcpu->vcpu_id, irq, 0); +} + +static int vgic_dist_irq_soft_pend(struct kvm_vcpu *vcpu, int irq) +{ + struct vgic_dist *dist = &vcpu->kvm->arch.vgic; + + return vgic_bitmap_get_irq_val(&dist->irq_soft_pend, vcpu->vcpu_id, irq); +} + +static void vgic_dist_irq_clear_soft_pend(struct kvm_vcpu *vcpu, int irq) +{ + struct vgic_dist *dist = &vcpu->kvm->arch.vgic; + + vgic_bitmap_set_irq_val(&dist->irq_soft_pend, vcpu->vcpu_id, irq, 0); +} + static int vgic_dist_irq_is_pending(struct kvm_vcpu *vcpu, int irq) { struct vgic_dist *dist = &vcpu->kvm->arch.vgic; @@ -414,11 +454,26 @@ static bool handle_mmio_set_pending_reg(struct kvm_vcpu *vcpu, struct kvm_exit_mmio *mmio, phys_addr_t offset) { - u32 *reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_pending, - vcpu->vcpu_id, offset); + u32 *reg; + u32 level_mask; + struct vgic_dist *dist = &vcpu->kvm->arch.vgic; + + reg = vgic_bitmap_get_reg(&dist->irq_cfg, vcpu->vcpu_id, offset); + level_mask = (~(*reg)); + + /* Mark both level and edge triggered irqs as pending */ + reg = vgic_bitmap_get_reg(&dist->irq_pending, vcpu->vcpu_id, offset); vgic_reg_access(mmio, reg, offset, ACCESS_READ_VALUE | ACCESS_WRITE_SETBIT); + if (mmio->is_write) { + /* Set the soft-pending flag only for level-triggered irqs */ + reg = vgic_bitmap_get_reg(&dist->irq_soft_pend, + vcpu->vcpu_id, offset); + vgic_reg_access(mmio, reg, offset, + ACCESS_READ_VALUE | ACCESS_WRITE_SETBIT); + *reg &= level_mask; + vgic_update_state(vcpu->kvm); return true; } @@ -430,11 +485,27 @@ static bool handle_mmio_clear_pending_reg(struct kvm_vcpu *vcpu, struct kvm_exit_mmio *mmio, phys_addr_t offset) { - u32 *reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_pending, - vcpu->vcpu_id, offset); + u32 *level_active; + u32 *reg; + struct vgic_dist *dist = &vcpu->kvm->arch.vgic; + + reg = vgic_bitmap_get_reg(&dist->irq_pending, vcpu->vcpu_id, offset); vgic_reg_access(mmio, reg, offset, ACCESS_READ_VALUE | ACCESS_WRITE_CLEARBIT); if (mmio->is_write) { + /* Re-set level triggered level-active interrupts */ + level_active = vgic_bitmap_get_reg(&dist->irq_level, + vcpu->vcpu_id, offset); + reg = vgic_bitmap_get_reg(&dist->irq_pending, + vcpu->vcpu_id, offset); + *reg |= *level_active; + + /* Clear soft-pending flags */ + reg = vgic_bitmap_get_reg(&dist->irq_soft_pend, + vcpu->vcpu_id, offset); + vgic_reg_access(mmio, reg, offset, + ACCESS_READ_VALUE | ACCESS_WRITE_CLEARBIT); + vgic_update_state(vcpu->kvm); return true; } @@ -1268,17 +1339,32 @@ static bool vgic_process_maintenance(struct kvm_vcpu *vcpu) for_each_set_bit(lr, eisr_ptr, vgic->nr_lr) { struct vgic_lr vlr = vgic_get_lr(vcpu, lr); + WARN_ON(vgic_irq_is_edge(vcpu, vlr.irq)); vgic_irq_clear_queued(vcpu, vlr.irq); WARN_ON(vlr.state & LR_STATE_MASK); vlr.state = 0; vgic_set_lr(vcpu, lr, vlr); + /* + * If the IRQ was EOIed it was also ACKed and we we + * therefore assume we can clear the soft pending + * state (should it had been set) for this interrupt. + * + * Note: if the IRQ soft pending state was set after + * the IRQ was acked, it actually shouldn't be + * cleared, but we have no way of knowing that unless + * we start trapping ACKs when the soft-pending state + * is set. + */ + vgic_dist_irq_clear_soft_pend(vcpu, vlr.irq); + /* Any additional pending interrupt? */ - if (vgic_dist_irq_is_pending(vcpu, vlr.irq)) { + if (vgic_dist_irq_get_level(vcpu, vlr.irq)) { vgic_cpu_irq_set(vcpu, vlr.irq); level_pending = true; } else { + vgic_dist_irq_clear_pending(vcpu, vlr.irq); vgic_cpu_irq_clear(vcpu, vlr.irq); } @@ -1384,17 +1470,19 @@ static void vgic_kick_vcpus(struct kvm *kvm) static int vgic_validate_injection(struct kvm_vcpu *vcpu, int irq, int level) { int edge_triggered = vgic_irq_is_edge(vcpu, irq); - int state = vgic_dist_irq_is_pending(vcpu, irq); /* * Only inject an interrupt if: * - edge triggered and we have a rising edge * - level triggered and we change level */ - if (edge_triggered) + if (edge_triggered) { + int state = vgic_dist_irq_is_pending(vcpu, irq); return level > state; - else + } else { + int state = vgic_dist_irq_get_level(vcpu, irq); return level != state; + } } static bool vgic_update_irq_pending(struct kvm *kvm, int cpuid, @@ -1424,10 +1512,19 @@ static bool vgic_update_irq_pending(struct kvm *kvm, int cpuid, kvm_debug("Inject IRQ%d level %d CPU%d\n", irq_num, level, cpuid); - if (level) + if (level) { + if (level_triggered) + vgic_dist_irq_set_level(vcpu, irq_num); vgic_dist_irq_set_pending(vcpu, irq_num); - else - vgic_dist_irq_clear_pending(vcpu, irq_num); + } else { + if (level_triggered) { + vgic_dist_irq_clear_level(vcpu, irq_num); + if (!vgic_dist_irq_soft_pend(vcpu, irq_num)) + vgic_dist_irq_clear_pending(vcpu, irq_num); + } else { + vgic_dist_irq_clear_pending(vcpu, irq_num); + } + } enabled = vgic_irq_is_enabled(vcpu, irq_num); -- cgit v1.2.3 From 543cbd3252398a1e5ccde861703025cc8a32996f Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Sat, 14 Jun 2014 22:30:45 +0200 Subject: arm/arm64: KVM: vgic: Fix SGI writes to GICD_I{CS}PENDR0 Writes to GICD_ISPENDR0 and GICD_ICPENDR0 ignore all settings of the pending state for SGIs. Make sure the implementation handles this correctly. Signed-off-by: Christoffer Dall (cherry picked from commit 9da48b5502622f9f0e49df957521ec43a0c9f4c1) Signed-off-by: Christoffer Dall --- virt/kvm/arm/vgic.c | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) (limited to 'virt') diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index bb78526b71e8..a9154523b4f7 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -454,7 +454,7 @@ static bool handle_mmio_set_pending_reg(struct kvm_vcpu *vcpu, struct kvm_exit_mmio *mmio, phys_addr_t offset) { - u32 *reg; + u32 *reg, orig; u32 level_mask; struct vgic_dist *dist = &vcpu->kvm->arch.vgic; @@ -463,6 +463,7 @@ static bool handle_mmio_set_pending_reg(struct kvm_vcpu *vcpu, /* Mark both level and edge triggered irqs as pending */ reg = vgic_bitmap_get_reg(&dist->irq_pending, vcpu->vcpu_id, offset); + orig = *reg; vgic_reg_access(mmio, reg, offset, ACCESS_READ_VALUE | ACCESS_WRITE_SETBIT); @@ -474,6 +475,12 @@ static bool handle_mmio_set_pending_reg(struct kvm_vcpu *vcpu, ACCESS_READ_VALUE | ACCESS_WRITE_SETBIT); *reg &= level_mask; + /* Ignore writes to SGIs */ + if (offset < 2) { + *reg &= ~0xffff; + *reg |= orig & 0xffff; + } + vgic_update_state(vcpu->kvm); return true; } @@ -486,10 +493,11 @@ static bool handle_mmio_clear_pending_reg(struct kvm_vcpu *vcpu, phys_addr_t offset) { u32 *level_active; - u32 *reg; + u32 *reg, orig; struct vgic_dist *dist = &vcpu->kvm->arch.vgic; reg = vgic_bitmap_get_reg(&dist->irq_pending, vcpu->vcpu_id, offset); + orig = *reg; vgic_reg_access(mmio, reg, offset, ACCESS_READ_VALUE | ACCESS_WRITE_CLEARBIT); if (mmio->is_write) { @@ -500,6 +508,12 @@ static bool handle_mmio_clear_pending_reg(struct kvm_vcpu *vcpu, vcpu->vcpu_id, offset); *reg |= *level_active; + /* Ignore writes to SGIs */ + if (offset < 2) { + *reg &= ~0xffff; + *reg |= orig & 0xffff; + } + /* Clear soft-pending flags */ reg = vgic_bitmap_get_reg(&dist->irq_soft_pend, vcpu->vcpu_id, offset); -- cgit v1.2.3 From 3da92293209abc453abe56a4a8956b33f68fd6a9 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Sat, 14 Jun 2014 22:34:04 +0200 Subject: arm/arm64: KVM: vgic: Clarify and correct vgic documentation The VGIC virtual distributor implementation documentation was written a very long time ago, before the true nature of the beast had been partially absorbed into my bloodstream. Clarify the docs. Plus, it fixes an actual bug. ICFRn, pfff. Signed-off-by: Christoffer Dall (cherry picked from commit 7e362919a59e6fc60e08ad1cf0b047291d1ca2e9) Signed-off-by: Christoffer Dall --- virt/kvm/arm/vgic.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'virt') diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index a9154523b4f7..2369b9e999b0 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -36,21 +36,22 @@ * How the whole thing works (courtesy of Christoffer Dall): * * - At any time, the dist->irq_pending_on_cpu is the oracle that knows if - * something is pending - * - VGIC pending interrupts are stored on the vgic.irq_pending vgic - * bitmap (this bitmap is updated by both user land ioctls and guest - * mmio ops, and other in-kernel peripherals such as the - * arch. timers) and indicate the 'wire' state. + * something is pending on the CPU interface. + * - Interrupts that are pending on the distributor are stored on the + * vgic.irq_pending vgic bitmap (this bitmap is updated by both user land + * ioctls and guest mmio ops, and other in-kernel peripherals such as the + * arch. timers). * - Every time the bitmap changes, the irq_pending_on_cpu oracle is * recalculated * - To calculate the oracle, we need info for each cpu from * compute_pending_for_cpu, which considers: * - PPI: dist->irq_pending & dist->irq_enable * - SPI: dist->irq_pending & dist->irq_enable & dist->irq_spi_target - * - irq_spi_target is a 'formatted' version of the GICD_ICFGR + * - irq_spi_target is a 'formatted' version of the GICD_ITARGETSRn * registers, stored on each vcpu. We only keep one bit of * information per interrupt, making sure that only one vcpu can * accept the interrupt. + * - If any of the above state changes, we must recalculate the oracle. * - The same is true when injecting an interrupt, except that we only * consider a single interrupt at a time. The irq_spi_cpu array * contains the target CPU for each SPI. -- cgit v1.2.3 From 1bceb4bdf04ec4fc19f9e36f702f9c95391f5970 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 8 Jul 2014 12:09:00 +0100 Subject: KVM: ARM: vgic: plug irq injection race As it stands, nothing prevents userspace from injecting an interrupt before the guest's GIC is actually initialized. This goes unnoticed so far (as everything is pretty much statically allocated), but ends up exploding in a spectacular way once we switch to a more dynamic allocation (the GIC data structure isn't there yet). The fix is to test for the "ready" flag in the VGIC distributor before trying to inject the interrupt. Note that in order to avoid breaking userspace, we have to ignore what is essentially an error. Signed-off-by: Marc Zyngier Acked-by: Christoffer Dall (cherry picked from commit 71afaba4a2e98bb7bdeba5078370ab43d46e67a1) Signed-off-by: Christoffer Dall --- virt/kvm/arm/vgic.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'virt') diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index 2369b9e999b0..8f253c356fce 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -1585,7 +1585,8 @@ out: int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int irq_num, bool level) { - if (vgic_update_irq_pending(kvm, cpuid, irq_num, level)) + if (likely(vgic_initialized(kvm)) && + vgic_update_irq_pending(kvm, cpuid, irq_num, level)) vgic_kick_vcpus(kvm); return 0; -- cgit v1.2.3 From 6edde8a33306861fdef1c9a5ee162d0a8c3037c7 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 8 Jul 2014 12:09:01 +0100 Subject: arm/arm64: KVM: vgic: switch to dynamic allocation So far, all the VGIC data structures are statically defined by the *maximum* number of vcpus and interrupts it supports. It means that we always have to oversize it to cater for the worse case. Start by changing the data structures to be dynamically sizeable, and allocate them at runtime. The sizes are still very static though. Signed-off-by: Marc Zyngier (cherry picked from commit c1bfb577addd4867a82c4f235824a315d5afb94a) Signed-off-by: Christoffer Dall --- virt/kvm/arm/vgic.c | 243 ++++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 208 insertions(+), 35 deletions(-) (limited to 'virt') diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index 8f253c356fce..c9c55385ade2 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -95,6 +95,7 @@ static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu); static void vgic_retire_lr(int lr_nr, int irq, struct kvm_vcpu *vcpu); static void vgic_update_state(struct kvm *kvm); static void vgic_kick_vcpus(struct kvm *kvm); +static u8 *vgic_get_sgi_sources(struct vgic_dist *dist, int vcpu_id, int sgi); static void vgic_dispatch_sgi(struct kvm_vcpu *vcpu, u32 reg); static struct vgic_lr vgic_get_lr(const struct kvm_vcpu *vcpu, int lr); static void vgic_set_lr(struct kvm_vcpu *vcpu, int lr, struct vgic_lr lr_desc); @@ -105,10 +106,8 @@ static const struct vgic_ops *vgic_ops; static const struct vgic_params *vgic; /* - * struct vgic_bitmap contains unions that provide two views of - * the same data. In one case it is an array of registers of - * u32's, and in the other case it is a bitmap of unsigned - * longs. + * struct vgic_bitmap contains a bitmap made of unsigned longs, but + * extracts u32s out of them. * * This does not work on 64-bit BE systems, because the bitmap access * will store two consecutive 32-bit words with the higher-addressed @@ -124,23 +123,45 @@ static const struct vgic_params *vgic; #define REG_OFFSET_SWIZZLE 0 #endif +static int vgic_init_bitmap(struct vgic_bitmap *b, int nr_cpus, int nr_irqs) +{ + int nr_longs; + + nr_longs = nr_cpus + BITS_TO_LONGS(nr_irqs - VGIC_NR_PRIVATE_IRQS); + + b->private = kzalloc(sizeof(unsigned long) * nr_longs, GFP_KERNEL); + if (!b->private) + return -ENOMEM; + + b->shared = b->private + nr_cpus; + + return 0; +} + +static void vgic_free_bitmap(struct vgic_bitmap *b) +{ + kfree(b->private); + b->private = NULL; + b->shared = NULL; +} + static u32 *vgic_bitmap_get_reg(struct vgic_bitmap *x, int cpuid, u32 offset) { offset >>= 2; if (!offset) - return x->percpu[cpuid].reg + (offset ^ REG_OFFSET_SWIZZLE); + return (u32 *)(x->private + cpuid) + REG_OFFSET_SWIZZLE; else - return x->shared.reg + ((offset - 1) ^ REG_OFFSET_SWIZZLE); + return (u32 *)(x->shared) + ((offset - 1) ^ REG_OFFSET_SWIZZLE); } static int vgic_bitmap_get_irq_val(struct vgic_bitmap *x, int cpuid, int irq) { if (irq < VGIC_NR_PRIVATE_IRQS) - return test_bit(irq, x->percpu[cpuid].reg_ul); + return test_bit(irq, x->private + cpuid); - return test_bit(irq - VGIC_NR_PRIVATE_IRQS, x->shared.reg_ul); + return test_bit(irq - VGIC_NR_PRIVATE_IRQS, x->shared); } static void vgic_bitmap_set_irq_val(struct vgic_bitmap *x, int cpuid, @@ -149,9 +170,9 @@ static void vgic_bitmap_set_irq_val(struct vgic_bitmap *x, int cpuid, unsigned long *reg; if (irq < VGIC_NR_PRIVATE_IRQS) { - reg = x->percpu[cpuid].reg_ul; + reg = x->private + cpuid; } else { - reg = x->shared.reg_ul; + reg = x->shared; irq -= VGIC_NR_PRIVATE_IRQS; } @@ -163,24 +184,49 @@ static void vgic_bitmap_set_irq_val(struct vgic_bitmap *x, int cpuid, static unsigned long *vgic_bitmap_get_cpu_map(struct vgic_bitmap *x, int cpuid) { - if (unlikely(cpuid >= VGIC_MAX_CPUS)) - return NULL; - return x->percpu[cpuid].reg_ul; + return x->private + cpuid; } static unsigned long *vgic_bitmap_get_shared_map(struct vgic_bitmap *x) { - return x->shared.reg_ul; + return x->shared; +} + +static int vgic_init_bytemap(struct vgic_bytemap *x, int nr_cpus, int nr_irqs) +{ + int size; + + size = nr_cpus * VGIC_NR_PRIVATE_IRQS; + size += nr_irqs - VGIC_NR_PRIVATE_IRQS; + + x->private = kzalloc(size, GFP_KERNEL); + if (!x->private) + return -ENOMEM; + + x->shared = x->private + nr_cpus * VGIC_NR_PRIVATE_IRQS / sizeof(u32); + return 0; +} + +static void vgic_free_bytemap(struct vgic_bytemap *b) +{ + kfree(b->private); + b->private = NULL; + b->shared = NULL; } static u32 *vgic_bytemap_get_reg(struct vgic_bytemap *x, int cpuid, u32 offset) { - offset >>= 2; - BUG_ON(offset > (VGIC_NR_IRQS / 4)); - if (offset < 8) - return x->percpu[cpuid] + offset; - else - return x->shared + offset - 8; + u32 *reg; + + if (offset < VGIC_NR_PRIVATE_IRQS) { + reg = x->private; + offset += cpuid * VGIC_NR_PRIVATE_IRQS; + } else { + reg = x->shared; + offset -= VGIC_NR_PRIVATE_IRQS; + } + + return reg + (offset / sizeof(u32)); } #define VGIC_CFG_LEVEL 0 @@ -744,7 +790,7 @@ static void vgic_unqueue_irqs(struct kvm_vcpu *vcpu) */ vgic_dist_irq_set_pending(vcpu, lr.irq); if (lr.irq < VGIC_NR_SGIS) - dist->irq_sgi_sources[vcpu_id][lr.irq] |= 1 << lr.source; + *vgic_get_sgi_sources(dist, vcpu_id, lr.irq) |= 1 << lr.source; lr.state &= ~LR_STATE_PENDING; vgic_set_lr(vcpu, i, lr); @@ -778,7 +824,7 @@ static bool read_set_clear_sgi_pend_reg(struct kvm_vcpu *vcpu, /* Copy source SGIs from distributor side */ for (sgi = min_sgi; sgi <= max_sgi; sgi++) { int shift = 8 * (sgi - min_sgi); - reg |= (u32)dist->irq_sgi_sources[vcpu_id][sgi] << shift; + reg |= ((u32)*vgic_get_sgi_sources(dist, vcpu_id, sgi)) << shift; } mmio_data_write(mmio, ~0, reg); @@ -802,14 +848,15 @@ static bool write_set_clear_sgi_pend_reg(struct kvm_vcpu *vcpu, /* Clear pending SGIs on the distributor */ for (sgi = min_sgi; sgi <= max_sgi; sgi++) { u8 mask = reg >> (8 * (sgi - min_sgi)); + u8 *src = vgic_get_sgi_sources(dist, vcpu_id, sgi); if (set) { - if ((dist->irq_sgi_sources[vcpu_id][sgi] & mask) != mask) + if ((*src & mask) != mask) updated = true; - dist->irq_sgi_sources[vcpu_id][sgi] |= mask; + *src |= mask; } else { - if (dist->irq_sgi_sources[vcpu_id][sgi] & mask) + if (*src & mask) updated = true; - dist->irq_sgi_sources[vcpu_id][sgi] &= ~mask; + *src &= ~mask; } } @@ -993,6 +1040,11 @@ bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run, return true; } +static u8 *vgic_get_sgi_sources(struct vgic_dist *dist, int vcpu_id, int sgi) +{ + return dist->irq_sgi_sources + vcpu_id * VGIC_NR_SGIS + sgi; +} + static void vgic_dispatch_sgi(struct kvm_vcpu *vcpu, u32 reg) { struct kvm *kvm = vcpu->kvm; @@ -1026,7 +1078,7 @@ static void vgic_dispatch_sgi(struct kvm_vcpu *vcpu, u32 reg) if (target_cpus & 1) { /* Flag the SGI as pending */ vgic_dist_irq_set_pending(vcpu, sgi); - dist->irq_sgi_sources[c][sgi] |= 1 << vcpu_id; + *vgic_get_sgi_sources(dist, c, sgi) |= 1 << vcpu_id; kvm_debug("SGI%d from CPU%d to CPU%d\n", sgi, vcpu_id, c); } @@ -1073,14 +1125,14 @@ static void vgic_update_state(struct kvm *kvm) int c; if (!dist->enabled) { - set_bit(0, &dist->irq_pending_on_cpu); + set_bit(0, dist->irq_pending_on_cpu); return; } kvm_for_each_vcpu(c, vcpu, kvm) { if (compute_pending_for_cpu(vcpu)) { pr_debug("CPU%d has pending interrupts\n", c); - set_bit(c, &dist->irq_pending_on_cpu); + set_bit(c, dist->irq_pending_on_cpu); } } } @@ -1237,14 +1289,14 @@ static bool vgic_queue_sgi(struct kvm_vcpu *vcpu, int irq) int vcpu_id = vcpu->vcpu_id; int c; - sources = dist->irq_sgi_sources[vcpu_id][irq]; + sources = *vgic_get_sgi_sources(dist, vcpu_id, irq); for_each_set_bit(c, &sources, VGIC_MAX_CPUS) { if (vgic_queue_irq(vcpu, c, irq)) clear_bit(c, &sources); } - dist->irq_sgi_sources[vcpu_id][irq] = sources; + *vgic_get_sgi_sources(dist, vcpu_id, irq) = sources; /* * If the sources bitmap has been cleared it means that we @@ -1332,7 +1384,7 @@ epilog: * us. Claim we don't have anything pending. We'll * adjust that if needed while exiting. */ - clear_bit(vcpu_id, &dist->irq_pending_on_cpu); + clear_bit(vcpu_id, dist->irq_pending_on_cpu); } } @@ -1430,7 +1482,7 @@ static void __kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu) /* Check if we still have something up our sleeve... */ pending = find_first_zero_bit(elrsr_ptr, vgic->nr_lr); if (level_pending || pending < vgic->nr_lr) - set_bit(vcpu->vcpu_id, &dist->irq_pending_on_cpu); + set_bit(vcpu->vcpu_id, dist->irq_pending_on_cpu); } void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu) @@ -1464,7 +1516,7 @@ int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu) if (!irqchip_in_kernel(vcpu->kvm)) return 0; - return test_bit(vcpu->vcpu_id, &dist->irq_pending_on_cpu); + return test_bit(vcpu->vcpu_id, dist->irq_pending_on_cpu); } static void vgic_kick_vcpus(struct kvm *kvm) @@ -1559,7 +1611,7 @@ static bool vgic_update_irq_pending(struct kvm *kvm, int cpuid, if (level) { vgic_cpu_irq_set(vcpu, irq_num); - set_bit(cpuid, &dist->irq_pending_on_cpu); + set_bit(cpuid, dist->irq_pending_on_cpu); } out: @@ -1603,6 +1655,32 @@ static irqreturn_t vgic_maintenance_handler(int irq, void *data) return IRQ_HANDLED; } +void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu) +{ + struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; + + kfree(vgic_cpu->pending_shared); + kfree(vgic_cpu->vgic_irq_lr_map); + vgic_cpu->pending_shared = NULL; + vgic_cpu->vgic_irq_lr_map = NULL; +} + +static int vgic_vcpu_init_maps(struct kvm_vcpu *vcpu, int nr_irqs) +{ + struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; + + int sz = (nr_irqs - VGIC_NR_PRIVATE_IRQS) / 8; + vgic_cpu->pending_shared = kzalloc(sz, GFP_KERNEL); + vgic_cpu->vgic_irq_lr_map = kzalloc(nr_irqs, GFP_KERNEL); + + if (!vgic_cpu->pending_shared || !vgic_cpu->vgic_irq_lr_map) { + kvm_vgic_vcpu_destroy(vcpu); + return -ENOMEM; + } + + return 0; +} + /** * kvm_vgic_vcpu_init - Initialize per-vcpu VGIC state * @vcpu: pointer to the vcpu struct @@ -1642,6 +1720,97 @@ int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu) return 0; } +void kvm_vgic_destroy(struct kvm *kvm) +{ + struct vgic_dist *dist = &kvm->arch.vgic; + struct kvm_vcpu *vcpu; + int i; + + kvm_for_each_vcpu(i, vcpu, kvm) + kvm_vgic_vcpu_destroy(vcpu); + + vgic_free_bitmap(&dist->irq_enabled); + vgic_free_bitmap(&dist->irq_level); + vgic_free_bitmap(&dist->irq_pending); + vgic_free_bitmap(&dist->irq_soft_pend); + vgic_free_bitmap(&dist->irq_queued); + vgic_free_bitmap(&dist->irq_cfg); + vgic_free_bytemap(&dist->irq_priority); + if (dist->irq_spi_target) { + for (i = 0; i < dist->nr_cpus; i++) + vgic_free_bitmap(&dist->irq_spi_target[i]); + } + kfree(dist->irq_sgi_sources); + kfree(dist->irq_spi_cpu); + kfree(dist->irq_spi_target); + kfree(dist->irq_pending_on_cpu); + dist->irq_sgi_sources = NULL; + dist->irq_spi_cpu = NULL; + dist->irq_spi_target = NULL; + dist->irq_pending_on_cpu = NULL; +} + +/* + * Allocate and initialize the various data structures. Must be called + * with kvm->lock held! + */ +static int vgic_init_maps(struct kvm *kvm) +{ + struct vgic_dist *dist = &kvm->arch.vgic; + struct kvm_vcpu *vcpu; + int nr_cpus, nr_irqs; + int ret, i; + + nr_cpus = dist->nr_cpus = VGIC_MAX_CPUS; + nr_irqs = dist->nr_irqs = VGIC_NR_IRQS; + + ret = vgic_init_bitmap(&dist->irq_enabled, nr_cpus, nr_irqs); + ret |= vgic_init_bitmap(&dist->irq_level, nr_cpus, nr_irqs); + ret |= vgic_init_bitmap(&dist->irq_pending, nr_cpus, nr_irqs); + ret |= vgic_init_bitmap(&dist->irq_soft_pend, nr_cpus, nr_irqs); + ret |= vgic_init_bitmap(&dist->irq_queued, nr_cpus, nr_irqs); + ret |= vgic_init_bitmap(&dist->irq_cfg, nr_cpus, nr_irqs); + ret |= vgic_init_bytemap(&dist->irq_priority, nr_cpus, nr_irqs); + + if (ret) + goto out; + + dist->irq_sgi_sources = kzalloc(nr_cpus * VGIC_NR_SGIS, GFP_KERNEL); + dist->irq_spi_cpu = kzalloc(nr_irqs - VGIC_NR_PRIVATE_IRQS, GFP_KERNEL); + dist->irq_spi_target = kzalloc(sizeof(*dist->irq_spi_target) * nr_cpus, + GFP_KERNEL); + dist->irq_pending_on_cpu = kzalloc(BITS_TO_LONGS(nr_cpus) * sizeof(long), + GFP_KERNEL); + if (!dist->irq_sgi_sources || + !dist->irq_spi_cpu || + !dist->irq_spi_target || + !dist->irq_pending_on_cpu) { + ret = -ENOMEM; + goto out; + } + + for (i = 0; i < nr_cpus; i++) + ret |= vgic_init_bitmap(&dist->irq_spi_target[i], + nr_cpus, nr_irqs); + + if (ret) + goto out; + + kvm_for_each_vcpu(i, vcpu, kvm) { + ret = vgic_vcpu_init_maps(vcpu, nr_irqs); + if (ret) { + kvm_err("VGIC: Failed to allocate vcpu memory\n"); + break; + } + } + +out: + if (ret) + kvm_vgic_destroy(kvm); + + return ret; +} + /** * kvm_vgic_init - Initialize global VGIC state before running any VCPUs * @kvm: pointer to the kvm struct @@ -1722,6 +1891,10 @@ int kvm_vgic_create(struct kvm *kvm) kvm->arch.vgic.vgic_dist_base = VGIC_ADDR_UNDEF; kvm->arch.vgic.vgic_cpu_base = VGIC_ADDR_UNDEF; + ret = vgic_init_maps(kvm); + if (ret) + kvm_err("Unable to allocate maps\n"); + out_unlock: for (; vcpu_lock_idx >= 0; vcpu_lock_idx--) { vcpu = kvm_get_vcpu(kvm, vcpu_lock_idx); -- cgit v1.2.3 From 8f11676458a587d6e90f13f46075dc6d526695d8 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 8 Jul 2014 12:09:02 +0100 Subject: arm/arm64: KVM: vgic: Parametrize VGIC_NR_SHARED_IRQS Having a dynamic number of supported interrupts means that we cannot relly on VGIC_NR_SHARED_IRQS being fixed anymore. Instead, make it take the distributor structure as a parameter, so it can return the right value. Reviewed-by: Christoffer Dall Signed-off-by: Marc Zyngier (cherry picked from commit fb65ab63b8cae510ea1e43e68b5da2f9980aa6d5) Signed-off-by: Christoffer Dall --- virt/kvm/arm/vgic.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) (limited to 'virt') diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index c9c55385ade2..a0d9dbfe305d 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -1086,11 +1086,17 @@ static void vgic_dispatch_sgi(struct kvm_vcpu *vcpu, u32 reg) } } +static int vgic_nr_shared_irqs(struct vgic_dist *dist) +{ + return dist->nr_irqs - VGIC_NR_PRIVATE_IRQS; +} + static int compute_pending_for_cpu(struct kvm_vcpu *vcpu) { struct vgic_dist *dist = &vcpu->kvm->arch.vgic; unsigned long *pending, *enabled, *pend_percpu, *pend_shared; unsigned long pending_private, pending_shared; + int nr_shared = vgic_nr_shared_irqs(dist); int vcpu_id; vcpu_id = vcpu->vcpu_id; @@ -1103,15 +1109,15 @@ static int compute_pending_for_cpu(struct kvm_vcpu *vcpu) pending = vgic_bitmap_get_shared_map(&dist->irq_pending); enabled = vgic_bitmap_get_shared_map(&dist->irq_enabled); - bitmap_and(pend_shared, pending, enabled, VGIC_NR_SHARED_IRQS); + bitmap_and(pend_shared, pending, enabled, nr_shared); bitmap_and(pend_shared, pend_shared, vgic_bitmap_get_shared_map(&dist->irq_spi_target[vcpu_id]), - VGIC_NR_SHARED_IRQS); + nr_shared); pending_private = find_first_bit(pend_percpu, VGIC_NR_PRIVATE_IRQS); - pending_shared = find_first_bit(pend_shared, VGIC_NR_SHARED_IRQS); + pending_shared = find_first_bit(pend_shared, nr_shared); return (pending_private < VGIC_NR_PRIVATE_IRQS || - pending_shared < VGIC_NR_SHARED_IRQS); + pending_shared < vgic_nr_shared_irqs(dist)); } /* @@ -1368,7 +1374,7 @@ static void __kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu) } /* SPIs */ - for_each_set_bit(i, vgic_cpu->pending_shared, VGIC_NR_SHARED_IRQS) { + for_each_set_bit(i, vgic_cpu->pending_shared, vgic_nr_shared_irqs(dist)) { if (!vgic_queue_hwirq(vcpu, i + VGIC_NR_PRIVATE_IRQS)) overflow = 1; } -- cgit v1.2.3 From a96f093df30e848e7200beb24e9ae411dc8cff6a Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 8 Jul 2014 12:09:03 +0100 Subject: arm/arm64: KVM: vgic: kill VGIC_MAX_CPUS We now have the information about the number of CPU interfaces in the distributor itself. Let's get rid of VGIC_MAX_CPUS, and just rely on KVM_MAX_VCPUS where we don't have the choice. Yet. Reviewed-by: Christoffer Dall Signed-off-by: Marc Zyngier (cherry picked from commit fc675e355e705a046df7b635d3f3330c0ad94569) Signed-off-by: Christoffer Dall --- virt/kvm/arm/vgic.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'virt') diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index a0d9dbfe305d..296ee83bc725 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -1297,7 +1297,7 @@ static bool vgic_queue_sgi(struct kvm_vcpu *vcpu, int irq) sources = *vgic_get_sgi_sources(dist, vcpu_id, irq); - for_each_set_bit(c, &sources, VGIC_MAX_CPUS) { + for_each_set_bit(c, &sources, dist->nr_cpus) { if (vgic_queue_irq(vcpu, c, irq)) clear_bit(c, &sources); } @@ -1700,7 +1700,7 @@ int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu) struct vgic_dist *dist = &vcpu->kvm->arch.vgic; int i; - if (vcpu->vcpu_id >= VGIC_MAX_CPUS) + if (vcpu->vcpu_id >= dist->nr_cpus) return -EBUSY; for (i = 0; i < VGIC_NR_IRQS; i++) { @@ -1767,7 +1767,7 @@ static int vgic_init_maps(struct kvm *kvm) int nr_cpus, nr_irqs; int ret, i; - nr_cpus = dist->nr_cpus = VGIC_MAX_CPUS; + nr_cpus = dist->nr_cpus = KVM_MAX_VCPUS; nr_irqs = dist->nr_irqs = VGIC_NR_IRQS; ret = vgic_init_bitmap(&dist->irq_enabled, nr_cpus, nr_irqs); -- cgit v1.2.3 From 88792889345a3636a1799e67ae1124b26021ac03 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 8 Jul 2014 12:09:04 +0100 Subject: arm/arm64: KVM: vgic: handle out-of-range MMIO accesses Now that we can (almost) dynamically size the number of interrupts, we're facing an interesting issue: We have to evaluate at runtime whether or not an access hits a valid register, based on the sizing of this particular instance of the distributor. Furthermore, the GIC spec says that accessing a reserved register is RAZ/WI. For this, add a new field to our range structure, indicating the number of bits a single interrupts uses. That allows us to find out whether or not the access is in range. Reviewed-by: Christoffer Dall Signed-off-by: Marc Zyngier (cherry picked from commit c3c918361adcceb816c92b21dd95d2b46fb96a8f) Signed-off-by: Christoffer Dall --- virt/kvm/arm/vgic.c | 56 ++++++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 45 insertions(+), 11 deletions(-) (limited to 'virt') diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index 296ee83bc725..1f7f5b65d962 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -895,6 +895,7 @@ static bool handle_mmio_sgi_clear(struct kvm_vcpu *vcpu, struct mmio_range { phys_addr_t base; unsigned long len; + int bits_per_irq; bool (*handle_mmio)(struct kvm_vcpu *vcpu, struct kvm_exit_mmio *mmio, phys_addr_t offset); }; @@ -903,56 +904,67 @@ static const struct mmio_range vgic_dist_ranges[] = { { .base = GIC_DIST_CTRL, .len = 12, + .bits_per_irq = 0, .handle_mmio = handle_mmio_misc, }, { .base = GIC_DIST_IGROUP, - .len = VGIC_NR_IRQS / 8, + .len = VGIC_MAX_IRQS / 8, + .bits_per_irq = 1, .handle_mmio = handle_mmio_raz_wi, }, { .base = GIC_DIST_ENABLE_SET, - .len = VGIC_NR_IRQS / 8, + .len = VGIC_MAX_IRQS / 8, + .bits_per_irq = 1, .handle_mmio = handle_mmio_set_enable_reg, }, { .base = GIC_DIST_ENABLE_CLEAR, - .len = VGIC_NR_IRQS / 8, + .len = VGIC_MAX_IRQS / 8, + .bits_per_irq = 1, .handle_mmio = handle_mmio_clear_enable_reg, }, { .base = GIC_DIST_PENDING_SET, - .len = VGIC_NR_IRQS / 8, + .len = VGIC_MAX_IRQS / 8, + .bits_per_irq = 1, .handle_mmio = handle_mmio_set_pending_reg, }, { .base = GIC_DIST_PENDING_CLEAR, - .len = VGIC_NR_IRQS / 8, + .len = VGIC_MAX_IRQS / 8, + .bits_per_irq = 1, .handle_mmio = handle_mmio_clear_pending_reg, }, { .base = GIC_DIST_ACTIVE_SET, - .len = VGIC_NR_IRQS / 8, + .len = VGIC_MAX_IRQS / 8, + .bits_per_irq = 1, .handle_mmio = handle_mmio_raz_wi, }, { .base = GIC_DIST_ACTIVE_CLEAR, - .len = VGIC_NR_IRQS / 8, + .len = VGIC_MAX_IRQS / 8, + .bits_per_irq = 1, .handle_mmio = handle_mmio_raz_wi, }, { .base = GIC_DIST_PRI, - .len = VGIC_NR_IRQS, + .len = VGIC_MAX_IRQS, + .bits_per_irq = 8, .handle_mmio = handle_mmio_priority_reg, }, { .base = GIC_DIST_TARGET, - .len = VGIC_NR_IRQS, + .len = VGIC_MAX_IRQS, + .bits_per_irq = 8, .handle_mmio = handle_mmio_target_reg, }, { .base = GIC_DIST_CONFIG, - .len = VGIC_NR_IRQS / 4, + .len = VGIC_MAX_IRQS / 4, + .bits_per_irq = 2, .handle_mmio = handle_mmio_cfg_reg, }, { @@ -990,6 +1002,22 @@ struct mmio_range *find_matching_range(const struct mmio_range *ranges, return NULL; } +static bool vgic_validate_access(const struct vgic_dist *dist, + const struct mmio_range *range, + unsigned long offset) +{ + int irq; + + if (!range->bits_per_irq) + return true; /* Not an irq-based access */ + + irq = offset * 8 / range->bits_per_irq; + if (irq >= dist->nr_irqs) + return false; + + return true; +} + /** * vgic_handle_mmio - handle an in-kernel MMIO access * @vcpu: pointer to the vcpu performing the access @@ -1029,7 +1057,13 @@ bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run, spin_lock(&vcpu->kvm->arch.vgic.lock); offset = mmio->phys_addr - range->base - base; - updated_state = range->handle_mmio(vcpu, mmio, offset); + if (vgic_validate_access(dist, range, offset)) { + updated_state = range->handle_mmio(vcpu, mmio, offset); + } else { + vgic_reg_access(mmio, NULL, offset, + ACCESS_READ_RAZ | ACCESS_WRITE_IGNORED); + updated_state = false; + } spin_unlock(&vcpu->kvm->arch.vgic.lock); kvm_prepare_mmio(run, mmio); kvm_handle_mmio_return(vcpu, run); -- cgit v1.2.3 From 74a39865da36100bb45556a145ea697e37353ba9 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 8 Jul 2014 12:09:05 +0100 Subject: arm/arm64: KVM: vgic: kill VGIC_NR_IRQS Nuke VGIC_NR_IRQS entierly, now that the distributor instance contains the number of IRQ allocated to this GIC. Also add VGIC_NR_IRQS_LEGACY to preserve the current API. Reviewed-by: Christoffer Dall Signed-off-by: Marc Zyngier (cherry picked from commit 5fb66da64064d0cb8dcce4cc8bf4cb1b921b13a0) Signed-off-by: Christoffer Dall --- virt/kvm/arm/vgic.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) (limited to 'virt') diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index 1f7f5b65d962..b0e37684f5e8 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -439,7 +439,7 @@ static bool handle_mmio_misc(struct kvm_vcpu *vcpu, case 4: /* GICD_TYPER */ reg = (atomic_read(&vcpu->kvm->online_vcpus) - 1) << 5; - reg |= (VGIC_NR_IRQS >> 5) - 1; + reg |= (vcpu->kvm->arch.vgic.nr_irqs >> 5) - 1; vgic_reg_access(mmio, ®, word_offset, ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED); break; @@ -1277,13 +1277,14 @@ static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu) static bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 sgi_source_id, int irq) { struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; + struct vgic_dist *dist = &vcpu->kvm->arch.vgic; struct vgic_lr vlr; int lr; /* Sanitize the input... */ BUG_ON(sgi_source_id & ~7); BUG_ON(sgi_source_id && irq >= VGIC_NR_SGIS); - BUG_ON(irq >= VGIC_NR_IRQS); + BUG_ON(irq >= dist->nr_irqs); kvm_debug("Queue IRQ%d\n", irq); @@ -1515,7 +1516,7 @@ static void __kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu) vlr = vgic_get_lr(vcpu, lr); - BUG_ON(vlr.irq >= VGIC_NR_IRQS); + BUG_ON(vlr.irq >= dist->nr_irqs); vgic_cpu->vgic_irq_lr_map[vlr.irq] = LR_EMPTY; } @@ -1737,7 +1738,7 @@ int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu) if (vcpu->vcpu_id >= dist->nr_cpus) return -EBUSY; - for (i = 0; i < VGIC_NR_IRQS; i++) { + for (i = 0; i < dist->nr_irqs; i++) { if (i < VGIC_NR_PPIS) vgic_bitmap_set_irq_val(&dist->irq_enabled, vcpu->vcpu_id, i, 1); @@ -1802,7 +1803,11 @@ static int vgic_init_maps(struct kvm *kvm) int ret, i; nr_cpus = dist->nr_cpus = KVM_MAX_VCPUS; - nr_irqs = dist->nr_irqs = VGIC_NR_IRQS; + + if (!dist->nr_irqs) + dist->nr_irqs = VGIC_NR_IRQS_LEGACY; + + nr_irqs = dist->nr_irqs; ret = vgic_init_bitmap(&dist->irq_enabled, nr_cpus, nr_irqs); ret |= vgic_init_bitmap(&dist->irq_level, nr_cpus, nr_irqs); @@ -1886,7 +1891,7 @@ int kvm_vgic_init(struct kvm *kvm) goto out; } - for (i = VGIC_NR_PRIVATE_IRQS; i < VGIC_NR_IRQS; i += 4) + for (i = VGIC_NR_PRIVATE_IRQS; i < kvm->arch.vgic.nr_irqs; i += 4) vgic_set_target_reg(kvm, 0, i); kvm->arch.vgic.ready = true; -- cgit v1.2.3 From 9d4d6eba152e879d1b7e55e814a9c036d3e943a3 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 8 Jul 2014 12:09:06 +0100 Subject: arm/arm64: KVM: vgic: delay vgic allocation until init time It is now quite easy to delay the allocation of the vgic tables until we actually require it to be up and running (when the first vcpu is kicking around, or someones tries to access the GIC registers). This allow us to allocate memory for the exact number of CPUs we have. As nobody configures the number of interrupts just yet, use a fallback to VGIC_NR_IRQS_LEGACY. Reviewed-by: Christoffer Dall Signed-off-by: Marc Zyngier (cherry picked from commit 4956f2bc1fdee4bc336532f3f34635a8534cedfd) Signed-off-by: Christoffer Dall --- virt/kvm/arm/vgic.c | 42 +++++++++++++++++++++++++++++------------- 1 file changed, 29 insertions(+), 13 deletions(-) (limited to 'virt') diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index b0e37684f5e8..dd94dbc3f7f5 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -1729,15 +1729,12 @@ static int vgic_vcpu_init_maps(struct kvm_vcpu *vcpu, int nr_irqs) * Initialize the vgic_cpu struct and vgic_dist struct fields pertaining to * this vcpu and enable the VGIC for this VCPU */ -int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu) +static void kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu) { struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; struct vgic_dist *dist = &vcpu->kvm->arch.vgic; int i; - if (vcpu->vcpu_id >= dist->nr_cpus) - return -EBUSY; - for (i = 0; i < dist->nr_irqs; i++) { if (i < VGIC_NR_PPIS) vgic_bitmap_set_irq_val(&dist->irq_enabled, @@ -1757,8 +1754,6 @@ int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu) vgic_cpu->nr_lr = vgic->nr_lr; vgic_enable(vcpu); - - return 0; } void kvm_vgic_destroy(struct kvm *kvm) @@ -1802,8 +1797,17 @@ static int vgic_init_maps(struct kvm *kvm) int nr_cpus, nr_irqs; int ret, i; - nr_cpus = dist->nr_cpus = KVM_MAX_VCPUS; + if (dist->nr_cpus) /* Already allocated */ + return 0; + + nr_cpus = dist->nr_cpus = atomic_read(&kvm->online_vcpus); + if (!nr_cpus) /* No vcpus? Can't be good... */ + return -EINVAL; + /* + * If nobody configured the number of interrupts, use the + * legacy one. + */ if (!dist->nr_irqs) dist->nr_irqs = VGIC_NR_IRQS_LEGACY; @@ -1849,6 +1853,9 @@ static int vgic_init_maps(struct kvm *kvm) } } + for (i = VGIC_NR_PRIVATE_IRQS; i < dist->nr_irqs; i += 4) + vgic_set_target_reg(kvm, 0, i); + out: if (ret) kvm_vgic_destroy(kvm); @@ -1867,6 +1874,7 @@ out: */ int kvm_vgic_init(struct kvm *kvm) { + struct kvm_vcpu *vcpu; int ret = 0, i; if (!irqchip_in_kernel(kvm)) @@ -1884,6 +1892,12 @@ int kvm_vgic_init(struct kvm *kvm) goto out; } + ret = vgic_init_maps(kvm); + if (ret) { + kvm_err("Unable to allocate maps\n"); + goto out; + } + ret = kvm_phys_addr_ioremap(kvm, kvm->arch.vgic.vgic_cpu_base, vgic->vcpu_base, KVM_VGIC_V2_CPU_SIZE); if (ret) { @@ -1891,11 +1905,13 @@ int kvm_vgic_init(struct kvm *kvm) goto out; } - for (i = VGIC_NR_PRIVATE_IRQS; i < kvm->arch.vgic.nr_irqs; i += 4) - vgic_set_target_reg(kvm, 0, i); + kvm_for_each_vcpu(i, vcpu, kvm) + kvm_vgic_vcpu_init(vcpu); kvm->arch.vgic.ready = true; out: + if (ret) + kvm_vgic_destroy(kvm); mutex_unlock(&kvm->lock); return ret; } @@ -1936,10 +1952,6 @@ int kvm_vgic_create(struct kvm *kvm) kvm->arch.vgic.vgic_dist_base = VGIC_ADDR_UNDEF; kvm->arch.vgic.vgic_cpu_base = VGIC_ADDR_UNDEF; - ret = vgic_init_maps(kvm); - if (ret) - kvm_err("Unable to allocate maps\n"); - out_unlock: for (; vcpu_lock_idx >= 0; vcpu_lock_idx--) { vcpu = kvm_get_vcpu(kvm, vcpu_lock_idx); @@ -2140,6 +2152,10 @@ static int vgic_attr_regs_access(struct kvm_device *dev, mutex_lock(&dev->kvm->lock); + ret = vgic_init_maps(dev->kvm); + if (ret) + goto out; + if (cpuid >= atomic_read(&dev->kvm->online_vcpus)) { ret = -EINVAL; goto out; -- cgit v1.2.3 From 9e59676711104dcafee9747b61022d81514260ed Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 8 Jul 2014 12:09:07 +0100 Subject: arm/arm64: KVM: vgic: make number of irqs a configurable attribute In order to make the number of interrupts configurable, use the new fancy device management API to add KVM_DEV_ARM_VGIC_GRP_NR_IRQS as a VGIC configurable attribute. Userspace can now specify the exact size of the GIC (by increments of 32 interrupts). Reviewed-by: Christoffer Dall Signed-off-by: Marc Zyngier (cherry picked from commit a98f26f183801685ef57333de4bafd4bbc692c7c) Signed-off-by: Christoffer Dall --- virt/kvm/arm/vgic.c | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) (limited to 'virt') diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index dd94dbc3f7f5..a9255385a868 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -2253,6 +2253,36 @@ static int vgic_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr) return vgic_attr_regs_access(dev, attr, ®, true); } + case KVM_DEV_ARM_VGIC_GRP_NR_IRQS: { + u32 __user *uaddr = (u32 __user *)(long)attr->addr; + u32 val; + int ret = 0; + + if (get_user(val, uaddr)) + return -EFAULT; + + /* + * We require: + * - at least 32 SPIs on top of the 16 SGIs and 16 PPIs + * - at most 1024 interrupts + * - a multiple of 32 interrupts + */ + if (val < (VGIC_NR_PRIVATE_IRQS + 32) || + val > VGIC_MAX_IRQS || + (val & 31)) + return -EINVAL; + + mutex_lock(&dev->kvm->lock); + + if (vgic_initialized(dev->kvm) || dev->kvm->arch.vgic.nr_irqs) + ret = -EBUSY; + else + dev->kvm->arch.vgic.nr_irqs = val; + + mutex_unlock(&dev->kvm->lock); + + return ret; + } } @@ -2289,6 +2319,11 @@ static int vgic_get_attr(struct kvm_device *dev, struct kvm_device_attr *attr) r = put_user(reg, uaddr); break; } + case KVM_DEV_ARM_VGIC_GRP_NR_IRQS: { + u32 __user *uaddr = (u32 __user *)(long)attr->addr; + r = put_user(dev->kvm->arch.vgic.nr_irqs, uaddr); + break; + } } @@ -2325,6 +2360,8 @@ static int vgic_has_attr(struct kvm_device *dev, struct kvm_device_attr *attr) case KVM_DEV_ARM_VGIC_GRP_CPU_REGS: offset = attr->attr & KVM_DEV_ARM_VGIC_OFFSET_MASK; return vgic_has_attr_regs(vgic_cpu_ranges, offset); + case KVM_DEV_ARM_VGIC_GRP_NR_IRQS: + return 0; } return -ENXIO; } -- cgit v1.2.3 From 7c44b1920f8657b36898bc7593f0851f1a987dd1 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Thu, 25 Sep 2014 18:41:07 +0200 Subject: arm/arm64: KVM: Fix set_clear_sgi_pend_reg offset The sgi values calculated in read_set_clear_sgi_pend_reg() and write_set_clear_sgi_pend_reg() were horribly incorrectly multiplied by 4 with catastrophic results in that subfunctions ended up overwriting memory not allocated for the expected purpose. This showed up as bugs in kfree() and the kernel complaining a lot of you turn on memory debugging. This addresses: http://marc.info/?l=kvm&m=141164910007868&w=2 Reported-by: Shannon Zhao Signed-off-by: Christoffer Dall (cherry picked from commit 0fea6d7628ed6e25a9ee1b67edf7c859718d39e8) Signed-off-by: Christoffer Dall --- virt/kvm/arm/vgic.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'virt') diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index a9255385a868..8e1dc03342c3 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -816,7 +816,7 @@ static bool read_set_clear_sgi_pend_reg(struct kvm_vcpu *vcpu, { struct vgic_dist *dist = &vcpu->kvm->arch.vgic; int sgi; - int min_sgi = (offset & ~0x3) * 4; + int min_sgi = (offset & ~0x3); int max_sgi = min_sgi + 3; int vcpu_id = vcpu->vcpu_id; u32 reg = 0; @@ -837,7 +837,7 @@ static bool write_set_clear_sgi_pend_reg(struct kvm_vcpu *vcpu, { struct vgic_dist *dist = &vcpu->kvm->arch.vgic; int sgi; - int min_sgi = (offset & ~0x3) * 4; + int min_sgi = (offset & ~0x3); int max_sgi = min_sgi + 3; int vcpu_id = vcpu->vcpu_id; u32 reg; -- cgit v1.2.3 From 699d6a59f5d9aeddaf74ee4f9f81c51d5860f12c Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 12 Sep 2014 15:16:00 +0200 Subject: KVM: check for !is_zero_pfn() in kvm_is_mmio_pfn() Read-only memory ranges may be backed by the zero page, so avoid misidentifying it a a MMIO pfn. This fixes another issue I identified when testing QEMU+KVM_UEFI, where a read to an uninitialized emulated NOR flash brought in the zero page, but mapped as a read-write device region, because kvm_is_mmio_pfn() misidentifies it as a MMIO pfn due to its PG_reserved bit being set. Signed-off-by: Ard Biesheuvel Fixes: b88657674d39 ("ARM: KVM: user_mem_abort: support stage 2 MMIO page mapping") Signed-off-by: Paolo Bonzini (cherry picked from commit 85c8555ff07ef09261bd50d603cd4290cff5a8cc) Signed-off-by: Christoffer Dall --- virt/kvm/kvm_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'virt') diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 39fad76e973f..0733a2a51d0b 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -108,7 +108,7 @@ static bool largepages_enabled = true; bool kvm_is_mmio_pfn(pfn_t pfn) { if (pfn_valid(pfn)) - return PageReserved(pfn_to_page(pfn)); + return !is_zero_pfn(pfn) && PageReserved(pfn_to_page(pfn)); return true; } -- cgit v1.2.3 From d731c6c9b17b8a6b471981b3e310235049190034 Mon Sep 17 00:00:00 2001 From: Sam Bobroff Date: Fri, 19 Sep 2014 09:40:41 +1000 Subject: KVM: correct null pid check in kvm_vcpu_yield_to() Correct a simple mistake of checking the wrong variable before a dereference, resulting in the dereference not being properly protected by rcu_dereference(). Signed-off-by: Sam Bobroff Signed-off-by: Paolo Bonzini (cherry picked from commit 27fbe64bfa63cfb9da025975b59d96568caa2d53) Signed-off-by: Christoffer Dall --- virt/kvm/kvm_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'virt') diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 0733a2a51d0b..4bdadf1a6754 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1748,7 +1748,7 @@ bool kvm_vcpu_yield_to(struct kvm_vcpu *target) rcu_read_lock(); pid = rcu_dereference(target->pid); if (pid) - task = get_pid_task(target->pid, PIDTYPE_PID); + task = get_pid_task(pid, PIDTYPE_PID); rcu_read_unlock(); if (!task) return ret; -- cgit v1.2.3