aboutsummaryrefslogtreecommitdiff
path: root/drivers/staging/rdma/hfi1/user_exp_rcv.c
diff options
context:
space:
mode:
authorMitko Haralanov <mitko.haralanov@intel.com>2016-03-08 11:14:20 -0800
committerDoug Ledford <dledford@redhat.com>2016-03-21 15:54:59 -0400
commit06e0ffa69312ce33484bf5c63aa5fc576fde13a8 (patch)
treec213562cbae79d48313b6131a77290a4b52c81a6 /drivers/staging/rdma/hfi1/user_exp_rcv.c
parent000a830efd370bf93083c7af484ffd84ab7fb21f (diff)
IB/hfi1: Re-factor MMU notification code
The MMU notification code added to the expected receive side has been re-factored and split into it's own file. This was done in order to make the code more general and, therefore, usable by other parts of the driver. The caching behavior remains the same. However, the handling of the RB tree (insertion, deletions, and searching) as well as the MMU invalidation processing is now handled by functions in the mmu_rb.[ch] files. Reviewed-by: Dennis Dalessandro <dennis.dalessandro@intel.com> Reviewed-by: Dean Luick <dean.luick@intel.com> Signed-off-by: Mitko Haralanov <mitko.haralanov@intel.com> Signed-off-by: Jubin John <jubin.john@intel.com> Signed-off-by: Doug Ledford <dledford@redhat.com>
Diffstat (limited to 'drivers/staging/rdma/hfi1/user_exp_rcv.c')
-rw-r--r--drivers/staging/rdma/hfi1/user_exp_rcv.c336
1 files changed, 86 insertions, 250 deletions
diff --git a/drivers/staging/rdma/hfi1/user_exp_rcv.c b/drivers/staging/rdma/hfi1/user_exp_rcv.c
index fccae508a5d0..c9e05ddd469f 100644
--- a/drivers/staging/rdma/hfi1/user_exp_rcv.c
+++ b/drivers/staging/rdma/hfi1/user_exp_rcv.c
@@ -48,6 +48,7 @@
#include "user_exp_rcv.h"
#include "trace.h"
+#include "mmu_rb.h"
struct tid_group {
struct list_head list;
@@ -57,11 +58,9 @@ struct tid_group {
u8 map;
};
-struct mmu_rb_node {
- struct rb_node rbnode;
- unsigned long virt;
+struct tid_rb_node {
+ struct mmu_rb_node mmu;
unsigned long phys;
- unsigned long len;
struct tid_group *grp;
u32 rcventry;
dma_addr_t dma_addr;
@@ -70,16 +69,6 @@ struct mmu_rb_node {
struct page *pages[0];
};
-enum mmu_call_types {
- MMU_INVALIDATE_PAGE = 0,
- MMU_INVALIDATE_RANGE = 1
-};
-
-static const char * const mmu_types[] = {
- "PAGE",
- "RANGE"
-};
-
struct tid_pageset {
u16 idx;
u16 count;
@@ -99,28 +88,21 @@ static int set_rcvarray_entry(struct file *, unsigned long, u32,
struct tid_group *, struct page **, unsigned);
static inline int mmu_addr_cmp(struct mmu_rb_node *, unsigned long,
unsigned long);
-static struct mmu_rb_node *mmu_rb_search(struct rb_root *, unsigned long);
-static int mmu_rb_insert_by_addr(struct hfi1_filedata *, struct rb_root *,
- struct mmu_rb_node *);
-static int mmu_rb_insert_by_entry(struct hfi1_filedata *, struct rb_root *,
- struct mmu_rb_node *);
-static void mmu_rb_remove_by_addr(struct hfi1_filedata *, struct rb_root *,
- struct mmu_rb_node *);
-static void mmu_rb_remove_by_entry(struct hfi1_filedata *, struct rb_root *,
- struct mmu_rb_node *);
-static void mmu_notifier_mem_invalidate(struct mmu_notifier *,
- unsigned long, unsigned long,
- enum mmu_call_types);
-static inline void mmu_notifier_page(struct mmu_notifier *, struct mm_struct *,
- unsigned long);
-static inline void mmu_notifier_range_start(struct mmu_notifier *,
- struct mm_struct *,
- unsigned long, unsigned long);
+static int mmu_rb_insert(struct rb_root *, struct mmu_rb_node *);
+static void mmu_rb_remove(struct rb_root *, struct mmu_rb_node *);
+static int mmu_rb_invalidate(struct rb_root *, struct mmu_rb_node *);
static int program_rcvarray(struct file *, unsigned long, struct tid_group *,
struct tid_pageset *, unsigned, u16, struct page **,
u32 *, unsigned *, unsigned *);
static int unprogram_rcvarray(struct file *, u32, struct tid_group **);
-static void clear_tid_node(struct hfi1_filedata *, u16, struct mmu_rb_node *);
+static void clear_tid_node(struct hfi1_filedata *, u16, struct tid_rb_node *);
+
+static struct mmu_rb_ops tid_rb_ops = {
+ .compare = mmu_addr_cmp,
+ .insert = mmu_rb_insert,
+ .remove = mmu_rb_remove,
+ .invalidate = mmu_rb_invalidate
+};
static inline u32 rcventry2tidinfo(u32 rcventry)
{
@@ -167,11 +149,6 @@ static inline void tid_group_move(struct tid_group *group,
tid_group_add_tail(group, s2);
}
-static struct mmu_notifier_ops mn_opts = {
- .invalidate_page = mmu_notifier_page,
- .invalidate_range_start = mmu_notifier_range_start,
-};
-
/*
* Initialize context and file private data needed for Expected
* receive caching. This needs to be done after the context has
@@ -185,11 +162,8 @@ int hfi1_user_exp_rcv_init(struct file *fp)
unsigned tidbase;
int i, ret = 0;
- INIT_HLIST_NODE(&fd->mn.hlist);
- spin_lock_init(&fd->rb_lock);
spin_lock_init(&fd->tid_lock);
spin_lock_init(&fd->invalid_lock);
- fd->mn.ops = &mn_opts;
fd->tid_rb_root = RB_ROOT;
if (!uctxt->subctxt_cnt || !fd->subctxt) {
@@ -239,7 +213,7 @@ int hfi1_user_exp_rcv_init(struct file *fp)
* fails, continue but turn off the TID caching for
* all user contexts.
*/
- ret = mmu_notifier_register(&fd->mn, current->mm);
+ ret = hfi1_mmu_rb_register(&fd->tid_rb_root, &tid_rb_ops);
if (ret) {
dd_dev_info(dd,
"Failed MMU notifier registration %d\n",
@@ -250,11 +224,11 @@ int hfi1_user_exp_rcv_init(struct file *fp)
}
if (HFI1_CAP_IS_USET(TID_UNMAP)) {
- fd->mmu_rb_insert = mmu_rb_insert_by_entry;
- fd->mmu_rb_remove = mmu_rb_remove_by_entry;
+ fd->mmu_rb_insert = mmu_rb_insert;
+ fd->mmu_rb_remove = mmu_rb_remove;
} else {
- fd->mmu_rb_insert = mmu_rb_insert_by_addr;
- fd->mmu_rb_remove = mmu_rb_remove_by_addr;
+ fd->mmu_rb_insert = hfi1_mmu_rb_insert;
+ fd->mmu_rb_remove = hfi1_mmu_rb_remove;
}
/*
@@ -295,8 +269,8 @@ int hfi1_user_exp_rcv_free(struct hfi1_filedata *fd)
* The notifier would have been removed when the process'es mm
* was freed.
*/
- if (current->mm && !HFI1_CAP_IS_USET(TID_UNMAP))
- mmu_notifier_unregister(&fd->mn, current->mm);
+ if (!HFI1_CAP_IS_USET(TID_UNMAP))
+ hfi1_mmu_rb_unregister(&fd->tid_rb_root);
kfree(fd->invalid_tids);
@@ -312,19 +286,6 @@ int hfi1_user_exp_rcv_free(struct hfi1_filedata *fd)
list_del_init(&grp->list);
kfree(grp);
}
- spin_lock(&fd->rb_lock);
- if (!RB_EMPTY_ROOT(&fd->tid_rb_root)) {
- struct rb_node *node;
- struct mmu_rb_node *rbnode;
-
- while ((node = rb_first(&fd->tid_rb_root))) {
- rbnode = rb_entry(node, struct mmu_rb_node,
- rbnode);
- rb_erase(&rbnode->rbnode, &fd->tid_rb_root);
- kfree(rbnode);
- }
- }
- spin_unlock(&fd->rb_lock);
hfi1_clear_tids(uctxt);
}
@@ -866,7 +827,7 @@ static int set_rcvarray_entry(struct file *fp, unsigned long vaddr,
int ret;
struct hfi1_filedata *fd = fp->private_data;
struct hfi1_ctxtdata *uctxt = fd->uctxt;
- struct mmu_rb_node *node;
+ struct tid_rb_node *node;
struct hfi1_devdata *dd = uctxt->dd;
struct rb_root *root = &fd->tid_rb_root;
dma_addr_t phys;
@@ -890,9 +851,9 @@ static int set_rcvarray_entry(struct file *fp, unsigned long vaddr,
return -EFAULT;
}
- node->virt = vaddr;
+ node->mmu.addr = vaddr;
+ node->mmu.len = npages * PAGE_SIZE;
node->phys = page_to_phys(pages[0]);
- node->len = npages * PAGE_SIZE;
node->npages = npages;
node->rcventry = rcventry;
node->dma_addr = phys;
@@ -900,21 +861,19 @@ static int set_rcvarray_entry(struct file *fp, unsigned long vaddr,
node->freed = false;
memcpy(node->pages, pages, sizeof(struct page *) * npages);
- spin_lock(&fd->rb_lock);
- ret = fd->mmu_rb_insert(fd, root, node);
- spin_unlock(&fd->rb_lock);
+ ret = fd->mmu_rb_insert(root, &node->mmu);
if (ret) {
hfi1_cdbg(TID, "Failed to insert RB node %u 0x%lx, 0x%lx %d",
- node->rcventry, node->virt, node->phys, ret);
+ node->rcventry, node->mmu.addr, node->phys, ret);
pci_unmap_single(dd->pcidev, phys, npages * PAGE_SIZE,
PCI_DMA_FROMDEVICE);
kfree(node);
return -EFAULT;
}
hfi1_put_tid(dd, rcventry, PT_EXPECTED, phys, ilog2(npages) + 1);
- trace_hfi1_exp_tid_reg(uctxt->ctxt, fd->subctxt, rcventry,
- npages, node->virt, node->phys, phys);
+ trace_hfi1_exp_tid_reg(uctxt->ctxt, fd->subctxt, rcventry, npages,
+ node->mmu.addr, node->phys, phys);
return 0;
}
@@ -924,7 +883,7 @@ static int unprogram_rcvarray(struct file *fp, u32 tidinfo,
struct hfi1_filedata *fd = fp->private_data;
struct hfi1_ctxtdata *uctxt = fd->uctxt;
struct hfi1_devdata *dd = uctxt->dd;
- struct mmu_rb_node *node;
+ struct tid_rb_node *node;
u8 tidctrl = EXP_TID_GET(tidinfo, CTRL);
u32 tididx = EXP_TID_GET(tidinfo, IDX) << 1, rcventry;
@@ -939,14 +898,11 @@ static int unprogram_rcvarray(struct file *fp, u32 tidinfo,
rcventry = tididx + (tidctrl - 1);
- spin_lock(&fd->rb_lock);
node = fd->entry_to_rb[rcventry];
- if (!node || node->rcventry != (uctxt->expected_base + rcventry)) {
- spin_unlock(&fd->rb_lock);
+ if (!node || node->rcventry != (uctxt->expected_base + rcventry))
return -EBADF;
- }
- fd->mmu_rb_remove(fd, &fd->tid_rb_root, node);
- spin_unlock(&fd->rb_lock);
+ fd->mmu_rb_remove(&fd->tid_rb_root, &node->mmu);
+
if (grp)
*grp = node->grp;
clear_tid_node(fd, fd->subctxt, node);
@@ -954,13 +910,13 @@ static int unprogram_rcvarray(struct file *fp, u32 tidinfo,
}
static void clear_tid_node(struct hfi1_filedata *fd, u16 subctxt,
- struct mmu_rb_node *node)
+ struct tid_rb_node *node)
{
struct hfi1_ctxtdata *uctxt = fd->uctxt;
struct hfi1_devdata *dd = uctxt->dd;
trace_hfi1_exp_tid_unreg(uctxt->ctxt, fd->subctxt, node->rcventry,
- node->npages, node->virt, node->phys,
+ node->npages, node->mmu.addr, node->phys,
node->dma_addr);
hfi1_put_tid(dd, node->rcventry, PT_INVALID, 0, 0);
@@ -970,7 +926,7 @@ static void clear_tid_node(struct hfi1_filedata *fd, u16 subctxt,
*/
flush_wc();
- pci_unmap_single(dd->pcidev, node->dma_addr, node->len,
+ pci_unmap_single(dd->pcidev, node->dma_addr, node->mmu.len,
PCI_DMA_FROMDEVICE);
hfi1_release_user_pages(node->pages, node->npages, true);
@@ -997,216 +953,96 @@ static void unlock_exp_tids(struct hfi1_ctxtdata *uctxt,
list_for_each_entry_safe(grp, ptr, &set->list, list) {
list_del_init(&grp->list);
- spin_lock(&fd->rb_lock);
for (i = 0; i < grp->size; i++) {
if (grp->map & (1 << i)) {
u16 rcventry = grp->base + i;
- struct mmu_rb_node *node;
+ struct tid_rb_node *node;
node = fd->entry_to_rb[rcventry -
uctxt->expected_base];
if (!node || node->rcventry != rcventry)
continue;
- fd->mmu_rb_remove(fd, root, node);
+ fd->mmu_rb_remove(root, &node->mmu);
clear_tid_node(fd, -1, node);
}
}
- spin_unlock(&fd->rb_lock);
}
}
-static inline void mmu_notifier_page(struct mmu_notifier *mn,
- struct mm_struct *mm, unsigned long addr)
-{
- mmu_notifier_mem_invalidate(mn, addr, addr + PAGE_SIZE,
- MMU_INVALIDATE_PAGE);
-}
-
-static inline void mmu_notifier_range_start(struct mmu_notifier *mn,
- struct mm_struct *mm,
- unsigned long start,
- unsigned long end)
+static int mmu_rb_invalidate(struct rb_root *root, struct mmu_rb_node *mnode)
{
- mmu_notifier_mem_invalidate(mn, start, end, MMU_INVALIDATE_RANGE);
-}
+ struct hfi1_filedata *fdata =
+ container_of(root, struct hfi1_filedata, tid_rb_root);
+ struct hfi1_ctxtdata *uctxt = fdata->uctxt;
+ struct tid_rb_node *node =
+ container_of(mnode, struct tid_rb_node, mmu);
-static void mmu_notifier_mem_invalidate(struct mmu_notifier *mn,
- unsigned long start, unsigned long end,
- enum mmu_call_types type)
-{
- struct hfi1_filedata *fd = container_of(mn, struct hfi1_filedata, mn);
- struct hfi1_ctxtdata *uctxt = fd->uctxt;
- struct rb_root *root = &fd->tid_rb_root;
- struct mmu_rb_node *node;
- unsigned long addr = start;
+ if (node->freed)
+ return 0;
- trace_hfi1_mmu_invalidate(uctxt->ctxt, fd->subctxt, mmu_types[type],
- start, end);
+ trace_hfi1_exp_tid_inval(uctxt->ctxt, fdata->subctxt, node->mmu.addr,
+ node->rcventry, node->npages, node->dma_addr);
+ node->freed = true;
- spin_lock(&fd->rb_lock);
- while (addr < end) {
- node = mmu_rb_search(root, addr);
+ spin_lock(&fdata->invalid_lock);
+ if (fdata->invalid_tid_idx < uctxt->expected_count) {
+ fdata->invalid_tids[fdata->invalid_tid_idx] =
+ rcventry2tidinfo(node->rcventry - uctxt->expected_base);
+ fdata->invalid_tids[fdata->invalid_tid_idx] |=
+ EXP_TID_SET(LEN, node->npages);
+ if (!fdata->invalid_tid_idx) {
+ unsigned long *ev;
- if (!node) {
/*
- * Didn't find a node at this address. However, the
- * range could be bigger than what we have registered
- * so we have to keep looking.
+ * hfi1_set_uevent_bits() sets a user event flag
+ * for all processes. Because calling into the
+ * driver to process TID cache invalidations is
+ * expensive and TID cache invalidations are
+ * handled on a per-process basis, we can
+ * optimize this to set the flag only for the
+ * process in question.
*/
- addr += PAGE_SIZE;
- continue;
- }
-
- /*
- * The next address to be looked up is computed based
- * on the node's starting address. This is due to the
- * fact that the range where we start might be in the
- * middle of the node's buffer so simply incrementing
- * the address by the node's size would result is a
- * bad address.
- */
- addr = node->virt + (node->npages * PAGE_SIZE);
- if (node->freed)
- continue;
-
- trace_hfi1_exp_tid_inval(uctxt->ctxt, fd->subctxt, node->virt,
- node->rcventry, node->npages,
- node->dma_addr);
- node->freed = true;
-
- spin_lock(&fd->invalid_lock);
- if (fd->invalid_tid_idx < uctxt->expected_count) {
- fd->invalid_tids[fd->invalid_tid_idx] =
- rcventry2tidinfo(node->rcventry -
- uctxt->expected_base);
- fd->invalid_tids[fd->invalid_tid_idx] |=
- EXP_TID_SET(LEN, node->npages);
- if (!fd->invalid_tid_idx) {
- unsigned long *ev;
-
- /*
- * hfi1_set_uevent_bits() sets a user event flag
- * for all processes. Because calling into the
- * driver to process TID cache invalidations is
- * expensive and TID cache invalidations are
- * handled on a per-process basis, we can
- * optimize this to set the flag only for the
- * process in question.
- */
- ev = uctxt->dd->events +
- (((uctxt->ctxt -
- uctxt->dd->first_user_ctxt) *
- HFI1_MAX_SHARED_CTXTS) + fd->subctxt);
- set_bit(_HFI1_EVENT_TID_MMU_NOTIFY_BIT, ev);
- }
- fd->invalid_tid_idx++;
+ ev = uctxt->dd->events +
+ (((uctxt->ctxt - uctxt->dd->first_user_ctxt) *
+ HFI1_MAX_SHARED_CTXTS) + fdata->subctxt);
+ set_bit(_HFI1_EVENT_TID_MMU_NOTIFY_BIT, ev);
}
- spin_unlock(&fd->invalid_lock);
+ fdata->invalid_tid_idx++;
}
- spin_unlock(&fd->rb_lock);
+ spin_unlock(&fdata->invalid_lock);
+ return 0;
}
-static inline int mmu_addr_cmp(struct mmu_rb_node *node, unsigned long addr,
- unsigned long len)
+static int mmu_addr_cmp(struct mmu_rb_node *node, unsigned long addr,
+ unsigned long len)
{
- if ((addr + len) <= node->virt)
+ if ((addr + len) <= node->addr)
return -1;
- else if (addr >= node->virt && addr < (node->virt + node->len))
+ else if (addr >= node->addr && addr < (node->addr + node->len))
return 0;
else
return 1;
}
-static inline int mmu_entry_cmp(struct mmu_rb_node *node, u32 entry)
-{
- if (entry < node->rcventry)
- return -1;
- else if (entry > node->rcventry)
- return 1;
- else
- return 0;
-}
-
-static struct mmu_rb_node *mmu_rb_search(struct rb_root *root,
- unsigned long addr)
-{
- struct rb_node *node = root->rb_node;
-
- while (node) {
- struct mmu_rb_node *mnode =
- container_of(node, struct mmu_rb_node, rbnode);
- /*
- * When searching, use at least one page length for size. The
- * MMU notifier will not give us anything less than that. We
- * also don't need anything more than a page because we are
- * guaranteed to have non-overlapping buffers in the tree.
- */
- int result = mmu_addr_cmp(mnode, addr, PAGE_SIZE);
-
- if (result < 0)
- node = node->rb_left;
- else if (result > 0)
- node = node->rb_right;
- else
- return mnode;
- }
- return NULL;
-}
-
-static int mmu_rb_insert_by_entry(struct hfi1_filedata *fdata,
- struct rb_root *root,
- struct mmu_rb_node *node)
+static int mmu_rb_insert(struct rb_root *root, struct mmu_rb_node *node)
{
+ struct hfi1_filedata *fdata =
+ container_of(root, struct hfi1_filedata, tid_rb_root);
+ struct tid_rb_node *tnode =
+ container_of(node, struct tid_rb_node, mmu);
u32 base = fdata->uctxt->expected_base;
- fdata->entry_to_rb[node->rcventry - base] = node;
+ fdata->entry_to_rb[tnode->rcventry - base] = tnode;
return 0;
}
-static int mmu_rb_insert_by_addr(struct hfi1_filedata *fdata,
- struct rb_root *root, struct mmu_rb_node *node)
-{
- struct rb_node **new = &root->rb_node, *parent = NULL;
- u32 base = fdata->uctxt->expected_base;
-
- /* Figure out where to put new node */
- while (*new) {
- struct mmu_rb_node *this =
- container_of(*new, struct mmu_rb_node, rbnode);
- int result = mmu_addr_cmp(this, node->virt, node->len);
-
- parent = *new;
- if (result < 0)
- new = &((*new)->rb_left);
- else if (result > 0)
- new = &((*new)->rb_right);
- else
- return 1;
- }
-
- /* Add new node and rebalance tree. */
- rb_link_node(&node->rbnode, parent, new);
- rb_insert_color(&node->rbnode, root);
-
- fdata->entry_to_rb[node->rcventry - base] = node;
- return 0;
-}
-
-static void mmu_rb_remove_by_entry(struct hfi1_filedata *fdata,
- struct rb_root *root,
- struct mmu_rb_node *node)
-{
- u32 base = fdata->uctxt->expected_base;
-
- fdata->entry_to_rb[node->rcventry - base] = NULL;
-}
-
-static void mmu_rb_remove_by_addr(struct hfi1_filedata *fdata,
- struct rb_root *root,
- struct mmu_rb_node *node)
+static void mmu_rb_remove(struct rb_root *root, struct mmu_rb_node *node)
{
+ struct hfi1_filedata *fdata =
+ container_of(root, struct hfi1_filedata, tid_rb_root);
+ struct tid_rb_node *tnode =
+ container_of(node, struct tid_rb_node, mmu);
u32 base = fdata->uctxt->expected_base;
- fdata->entry_to_rb[node->rcventry - base] = NULL;
- rb_erase(&node->rbnode, root);
+ fdata->entry_to_rb[tnode->rcventry - base] = NULL;
}