summaryrefslogtreecommitdiff
path: root/xen/include/asm-x86/guest_pt.h
blob: 6647ccfb8520aa4ef27b77507e7669982dade833 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
/******************************************************************************
 * xen/asm-x86/guest_pt.h
 *
 * Types and accessors for guest pagetable entries, as distinct from
 * Xen's pagetable types.
 *
 * Users must #define GUEST_PAGING_LEVELS to 2, 3 or 4 before including
 * this file.
 *
 * Parts of this code are Copyright (c) 2006 by XenSource Inc.
 * Parts of this code are Copyright (c) 2006 by Michael A Fetterman
 * Parts based on earlier work by Michael A Fetterman, Ian Pratt et al.
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; If not, see <http://www.gnu.org/licenses/>.
 */

#ifndef _XEN_ASM_GUEST_PT_H
#define _XEN_ASM_GUEST_PT_H

#if !defined(GUEST_PAGING_LEVELS)
#error GUEST_PAGING_LEVELS not defined
#endif

static inline paddr_t
gfn_to_paddr(gfn_t gfn)
{
    return ((paddr_t)gfn_x(gfn)) << PAGE_SHIFT;
}

/* Override get_gfn to work with gfn_t */
#undef get_gfn
#define get_gfn(d, g, t) get_gfn_type((d), gfn_x(g), (t), P2M_ALLOC)

/* Mask covering the reserved bits from superpage alignment. */
#define SUPERPAGE_RSVD(bit)                                             \
    (((1ul << (bit)) - 1) & ~(_PAGE_PSE_PAT | (_PAGE_PSE_PAT - 1ul)))

static inline uint32_t fold_pse36(uint64_t val)
{
    return (val & ~(0x1fful << 13)) | ((val & (0x1fful << 32)) >> (32 - 13));
}
static inline uint64_t unfold_pse36(uint32_t val)
{
    return (val & ~(0x1fful << 13)) | ((val & (0x1fful << 13)) << (32 - 13));
}

/* Types of the guest's page tables and access functions for them */

#if GUEST_PAGING_LEVELS == 2

#define GUEST_L1_PAGETABLE_ENTRIES     1024
#define GUEST_L2_PAGETABLE_ENTRIES     1024

#define GUEST_L1_PAGETABLE_SHIFT         12
#define GUEST_L2_PAGETABLE_SHIFT         22

#define GUEST_L1_PAGETABLE_RSVD           0
#define GUEST_L2_PAGETABLE_RSVD           0

typedef uint32_t guest_intpte_t;
typedef struct { guest_intpte_t l1; } guest_l1e_t;
typedef struct { guest_intpte_t l2; } guest_l2e_t;

#define PRI_gpte "08x"

static inline gfn_t guest_l1e_get_gfn(guest_l1e_t gl1e)
{ return _gfn(gl1e.l1 >> PAGE_SHIFT); }
static inline gfn_t guest_l2e_get_gfn(guest_l2e_t gl2e)
{ return _gfn(gl2e.l2 >> PAGE_SHIFT); }

static inline u32 guest_l1e_get_flags(guest_l1e_t gl1e)
{ return gl1e.l1 & 0xfff; }
static inline u32 guest_l2e_get_flags(guest_l2e_t gl2e)
{ return gl2e.l2 & 0xfff; }

static inline u32 guest_l1e_get_pkey(guest_l1e_t gl1e)
{ return 0; }
static inline u32 guest_l2e_get_pkey(guest_l2e_t gl2e)
{ return 0; }

static inline guest_l1e_t guest_l1e_from_gfn(gfn_t gfn, u32 flags)
{ return (guest_l1e_t) { (gfn_x(gfn) << PAGE_SHIFT) | flags }; }
static inline guest_l2e_t guest_l2e_from_gfn(gfn_t gfn, u32 flags)
{ return (guest_l2e_t) { (gfn_x(gfn) << PAGE_SHIFT) | flags }; }

#define guest_l1_table_offset(_va)                                           \
    (((_va) >> GUEST_L1_PAGETABLE_SHIFT) & (GUEST_L1_PAGETABLE_ENTRIES - 1))
#define guest_l2_table_offset(_va)                                           \
    (((_va) >> GUEST_L2_PAGETABLE_SHIFT) & (GUEST_L2_PAGETABLE_ENTRIES - 1))

#else /* GUEST_PAGING_LEVELS != 2 */

#if GUEST_PAGING_LEVELS == 3

#define GUEST_L1_PAGETABLE_ENTRIES      512
#define GUEST_L2_PAGETABLE_ENTRIES      512
#define GUEST_L3_PAGETABLE_ENTRIES        4

#define GUEST_L1_PAGETABLE_SHIFT         12
#define GUEST_L2_PAGETABLE_SHIFT         21
#define GUEST_L3_PAGETABLE_SHIFT         30

#define GUEST_L1_PAGETABLE_RSVD            0x7ff0000000000000ul
#define GUEST_L2_PAGETABLE_RSVD            0x7ff0000000000000ul
#define GUEST_L3_PAGETABLE_RSVD                                      \
    (0xfff0000000000000ul | _PAGE_GLOBAL | _PAGE_PSE | _PAGE_DIRTY | \
     _PAGE_ACCESSED | _PAGE_USER | _PAGE_RW)

#else /* GUEST_PAGING_LEVELS == 4 */

#define GUEST_L1_PAGETABLE_ENTRIES      512
#define GUEST_L2_PAGETABLE_ENTRIES      512
#define GUEST_L3_PAGETABLE_ENTRIES      512
#define GUEST_L4_PAGETABLE_ENTRIES      512

#define GUEST_L1_PAGETABLE_SHIFT         12
#define GUEST_L2_PAGETABLE_SHIFT         21
#define GUEST_L3_PAGETABLE_SHIFT         30
#define GUEST_L4_PAGETABLE_SHIFT         39

#define GUEST_L1_PAGETABLE_RSVD            0
#define GUEST_L2_PAGETABLE_RSVD            0
#define GUEST_L3_PAGETABLE_RSVD            0
/* NB L4e._PAGE_GLOBAL is reserved for AMD, but ignored for Intel. */
#define GUEST_L4_PAGETABLE_RSVD            _PAGE_PSE

#endif

typedef l1_pgentry_t guest_l1e_t;
typedef l2_pgentry_t guest_l2e_t;
typedef l3_pgentry_t guest_l3e_t;
#if GUEST_PAGING_LEVELS >= 4
typedef l4_pgentry_t guest_l4e_t;
#endif
typedef intpte_t guest_intpte_t;

#define PRI_gpte "016"PRIx64

static inline gfn_t guest_l1e_get_gfn(guest_l1e_t gl1e)
{ return _gfn(l1e_get_paddr(gl1e) >> PAGE_SHIFT); }
static inline gfn_t guest_l2e_get_gfn(guest_l2e_t gl2e)
{ return _gfn(l2e_get_paddr(gl2e) >> PAGE_SHIFT); }
static inline gfn_t guest_l3e_get_gfn(guest_l3e_t gl3e)
{ return _gfn(l3e_get_paddr(gl3e) >> PAGE_SHIFT); }
#if GUEST_PAGING_LEVELS >= 4
static inline gfn_t guest_l4e_get_gfn(guest_l4e_t gl4e)
{ return _gfn(l4e_get_paddr(gl4e) >> PAGE_SHIFT); }
#endif

static inline u32 guest_l1e_get_flags(guest_l1e_t gl1e)
{ return l1e_get_flags(gl1e); }
static inline u32 guest_l2e_get_flags(guest_l2e_t gl2e)
{ return l2e_get_flags(gl2e); }
static inline u32 guest_l3e_get_flags(guest_l3e_t gl3e)
{ return l3e_get_flags(gl3e); }
#if GUEST_PAGING_LEVELS >= 4
static inline u32 guest_l4e_get_flags(guest_l4e_t gl4e)
{ return l4e_get_flags(gl4e); }
#endif

static inline u32 guest_l1e_get_pkey(guest_l1e_t gl1e)
{ return l1e_get_pkey(gl1e); }
static inline u32 guest_l2e_get_pkey(guest_l2e_t gl2e)
{ return l2e_get_pkey(gl2e); }
static inline u32 guest_l3e_get_pkey(guest_l3e_t gl3e)
{ return l3e_get_pkey(gl3e); }

static inline guest_l1e_t guest_l1e_from_gfn(gfn_t gfn, u32 flags)
{ return l1e_from_pfn(gfn_x(gfn), flags); }
static inline guest_l2e_t guest_l2e_from_gfn(gfn_t gfn, u32 flags)
{ return l2e_from_pfn(gfn_x(gfn), flags); }
static inline guest_l3e_t guest_l3e_from_gfn(gfn_t gfn, u32 flags)
{ return l3e_from_pfn(gfn_x(gfn), flags); }
#if GUEST_PAGING_LEVELS >= 4
static inline guest_l4e_t guest_l4e_from_gfn(gfn_t gfn, u32 flags)
{ return l4e_from_pfn(gfn_x(gfn), flags); }
#endif

#define guest_l1_table_offset(a) l1_table_offset(a)
#define guest_l2_table_offset(a) l2_table_offset(a)
#define guest_l3_table_offset(a) l3_table_offset(a)
#define guest_l4_table_offset(a) l4_table_offset(a)

#endif /* GUEST_PAGING_LEVELS != 2 */

/* Mask of the GFNs covered by an L2 or L3 superpage */
#define GUEST_L2_GFN_MASK (GUEST_L1_PAGETABLE_ENTRIES - 1)
#define GUEST_L3_GFN_MASK \
    ((GUEST_L2_PAGETABLE_ENTRIES * GUEST_L1_PAGETABLE_ENTRIES) - 1)


/* Which pagetable features are supported on this vcpu? */

static always_inline bool guest_can_use_l2_superpages(const struct vcpu *v)
{
    /*
     * PV guests use Xen's paging settings.  Being 4-level, 2M
     * superpages are unconditionally supported.
     *
     * The L2 _PAGE_PSE bit must be honoured in HVM guests, whenever
     * CR4.PSE is set or the guest is in PAE or long mode.
     * It's also used in the dummy PT for vcpus with CR0.PG cleared.
     */
    return (is_pv_vcpu(v) ||
            GUEST_PAGING_LEVELS != 2 ||
            !hvm_paging_enabled(v) ||
            (v->arch.hvm.guest_cr[4] & X86_CR4_PSE));
}

static always_inline bool guest_can_use_l3_superpages(const struct domain *d)
{
    /*
     * There are no control register settings for the hardware pagewalk on the
     * subject of 1G superpages.
     *
     * Shadow pagetables don't support 1GB superpages at all, and will always
     * treat L3 _PAGE_PSE as reserved.
     *
     * With HAP however, if the guest constructs a 1GB superpage on capable
     * hardware, it will function irrespective of whether the feature is
     * advertised.  Xen's model of performing a pagewalk should match.
     */
    return GUEST_PAGING_LEVELS >= 4 && paging_mode_hap(d) && cpu_has_page1gb;
}

static inline bool guest_can_use_pse36(const struct domain *d)
{
    /*
     * Only called in the context of 2-level guests, after
     * guest_can_use_l2_superpages() has indicated true.
     *
     * Shadow pagetables don't support PSE36 superpages at all, and will
     * always treat them as reserved.
     *
     * With HAP however, once L2 superpages are active, here are no control
     * register settings for the hardware pagewalk on the subject of PSE36.
     * If the guest constructs a PSE36 superpage on capable hardware, it will
     * function irrespective of whether the feature is advertised.  Xen's
     * model of performing a pagewalk should match.
     */
    return paging_mode_hap(d) && cpu_has_pse36;
}

static always_inline bool guest_nx_enabled(const struct vcpu *v)
{
    if ( GUEST_PAGING_LEVELS == 2 ) /* NX has no effect witout CR4.PAE. */
        return false;

    /* PV guests can't control EFER.NX, and inherits Xen's choice. */
    return is_pv_vcpu(v) ? cpu_has_nx : hvm_nx_enabled(v);
}

static always_inline bool guest_wp_enabled(const struct vcpu *v)
{
    /* PV guests can't control CR0.WP, and it is unconditionally set by Xen. */
    return is_pv_vcpu(v) || hvm_wp_enabled(v);
}

static always_inline bool guest_smep_enabled(const struct vcpu *v)
{
    return !is_pv_vcpu(v) && hvm_smep_enabled(v);
}

static always_inline bool guest_smap_enabled(const struct vcpu *v)
{
    return !is_pv_vcpu(v) && hvm_smap_enabled(v);
}

static always_inline bool guest_pku_enabled(const struct vcpu *v)
{
    return !is_pv_vcpu(v) && hvm_pku_enabled(v);
}

/* Helpers for identifying whether guest entries have reserved bits set. */

/* Bits reserved because of maxphysaddr, and (lack of) EFER.NX */
static always_inline uint64_t guest_rsvd_bits(const struct vcpu *v)
{
    return ((PADDR_MASK &
             ~((1ul << v->domain->arch.cpuid->extd.maxphysaddr) - 1)) |
            (guest_nx_enabled(v) ? 0 : put_pte_flags(_PAGE_NX_BIT)));
}

static always_inline bool guest_l1e_rsvd_bits(const struct vcpu *v,
                                              guest_l1e_t l1e)
{
    return l1e.l1 & (guest_rsvd_bits(v) | GUEST_L1_PAGETABLE_RSVD);
}

static always_inline bool guest_l2e_rsvd_bits(const struct vcpu *v,
                                              guest_l2e_t l2e)
{
    uint64_t rsvd_bits = guest_rsvd_bits(v);

    return ((l2e.l2 & (rsvd_bits | GUEST_L2_PAGETABLE_RSVD |
                       (guest_can_use_l2_superpages(v) ? 0 : _PAGE_PSE))) ||
            ((l2e.l2 & _PAGE_PSE) &&
             (l2e.l2 & ((GUEST_PAGING_LEVELS == 2 && guest_can_use_pse36(v->domain))
                          /* PSE36 tops out at 40 bits of address width. */
                        ? (fold_pse36(rsvd_bits | (1ul << 40)))
                        : SUPERPAGE_RSVD(GUEST_L2_PAGETABLE_SHIFT)))));
}

#if GUEST_PAGING_LEVELS >= 3
static always_inline bool guest_l3e_rsvd_bits(const struct vcpu *v,
                                              guest_l3e_t l3e)
{
    return ((l3e.l3 & (guest_rsvd_bits(v) | GUEST_L3_PAGETABLE_RSVD |
                       (guest_can_use_l3_superpages(v->domain) ? 0 : _PAGE_PSE))) ||
            ((l3e.l3 & _PAGE_PSE) &&
             (l3e.l3 & SUPERPAGE_RSVD(GUEST_L3_PAGETABLE_SHIFT))));
}

#if GUEST_PAGING_LEVELS >= 4
static always_inline bool guest_l4e_rsvd_bits(const struct vcpu *v,
                                              guest_l4e_t l4e)
{
    return l4e.l4 & (guest_rsvd_bits(v) | GUEST_L4_PAGETABLE_RSVD |
                     ((v->domain->arch.cpuid->x86_vendor == X86_VENDOR_AMD)
                      ? _PAGE_GLOBAL : 0));
}
#endif /* GUEST_PAGING_LEVELS >= 4 */
#endif /* GUEST_PAGING_LEVELS >= 3 */

/* Type used for recording a walk through guest pagetables.  It is
 * filled in by the pagetable walk function, and also used as a cache
 * for later walks.  When we encounter a superpage l2e, we fabricate an
 * l1e for propagation to the shadow (for splintering guest superpages
 * into many shadow l1 entries).  */
typedef struct guest_pagetable_walk walk_t;
struct guest_pagetable_walk
{
    unsigned long va;           /* Address we were looking for */
#if GUEST_PAGING_LEVELS >= 3
#if GUEST_PAGING_LEVELS >= 4
    guest_l4e_t l4e;            /* Guest's level 4 entry */
#endif
    guest_l3e_t l3e;            /* Guest's level 3 entry */
#endif
    guest_l2e_t l2e;            /* Guest's level 2 entry */
    union
    {
        guest_l1e_t l1e;        /* Guest's level 1 entry (or fabrication). */
        uint64_t   el1e;        /* L2 PSE36 superpages wider than 32 bits. */
    };
#if GUEST_PAGING_LEVELS >= 4
    mfn_t l4mfn;                /* MFN that the level 4 entry was in */
    mfn_t l3mfn;                /* MFN that the level 3 entry was in */
#endif
    mfn_t l2mfn;                /* MFN that the level 2 entry was in */
    mfn_t l1mfn;                /* MFN that the level 1 entry was in */

    uint32_t pfec;              /* Accumulated PFEC_* error code from walk. */
};

/* Given a walk_t, translate the gw->va into the guest's notion of the
 * corresponding frame number. */
static inline gfn_t guest_walk_to_gfn(const walk_t *gw)
{
    if ( !(guest_l1e_get_flags(gw->l1e) & _PAGE_PRESENT) )
        return INVALID_GFN;
    return (GUEST_PAGING_LEVELS == 2
            ? _gfn(gw->el1e >> PAGE_SHIFT)
            : guest_l1e_get_gfn(gw->l1e));
}

/* Given a walk_t, translate the gw->va into the guest's notion of the
 * corresponding physical address. */
static inline paddr_t guest_walk_to_gpa(const walk_t *gw)
{
    gfn_t gfn = guest_walk_to_gfn(gw);

    if ( gfn_eq(gfn, INVALID_GFN) )
        return INVALID_PADDR;

    return (gfn_x(gfn) << PAGE_SHIFT) | (gw->va & ~PAGE_MASK);
}

/* Given a walk_t from a successful walk, return the page-order of the
 * page or superpage that the virtual address is in. */
static inline unsigned int guest_walk_to_page_order(const walk_t *gw)
{
    /* This is only valid for successful walks - otherwise the
     * PSE bits might be invalid. */
    ASSERT(guest_l1e_get_flags(gw->l1e) & _PAGE_PRESENT);
#if GUEST_PAGING_LEVELS >= 3
    if ( guest_l3e_get_flags(gw->l3e) & _PAGE_PSE )
        return GUEST_L3_PAGETABLE_SHIFT - PAGE_SHIFT;
#endif
    if ( guest_l2e_get_flags(gw->l2e) & _PAGE_PSE )
        return GUEST_L2_PAGETABLE_SHIFT - PAGE_SHIFT;
    return GUEST_L1_PAGETABLE_SHIFT - PAGE_SHIFT;
}


/*
 * Walk the guest pagetables, after the manner of a hardware walker.
 *
 * Inputs: a vcpu, a virtual address, a walk_t to fill, a
 *         pointer to a pagefault code, the MFN of the guest's
 *         top-level pagetable, and a mapping of the
 *         guest's top-level pagetable.
 *
 * We walk the vcpu's guest pagetables, filling the walk_t with what we
 * see and adding any Accessed and Dirty bits that are needed in the
 * guest entries.  Using the pagefault code, we check the permissions as
 * we go.  For the purposes of reading pagetables we treat all non-RAM
 * memory as contining zeroes.
 *
 * Returns a boolean indicating success or failure.  walk_t.pfec contains
 * the accumulated error code on failure.
 */

/* Macro-fu so you can call guest_walk_tables() and get the right one. */
#define GPT_RENAME2(_n, _l) _n ## _ ## _l ## _levels
#define GPT_RENAME(_n, _l) GPT_RENAME2(_n, _l)
#define guest_walk_tables GPT_RENAME(guest_walk_tables, GUEST_PAGING_LEVELS)

bool
guest_walk_tables(const struct vcpu *v, struct p2m_domain *p2m,
                  unsigned long va, walk_t *gw, uint32_t pfec,
                  gfn_t top_gfn, mfn_t top_mfn, void *top_map);

/* Pretty-print the contents of a guest-walk */
static inline void print_gw(const walk_t *gw)
{
    gprintk(XENLOG_INFO, "GUEST WALK TO %p\n", _p(gw->va));
#if GUEST_PAGING_LEVELS >= 3 /* PAE or 64... */
#if GUEST_PAGING_LEVELS >= 4 /* 64-bit only... */
    gprintk(XENLOG_INFO, "   l4e=%" PRI_gpte " l4mfn=%" PRI_mfn "\n",
            gw->l4e.l4, mfn_x(gw->l4mfn));
    gprintk(XENLOG_INFO, "   l3e=%" PRI_gpte " l3mfn=%" PRI_mfn "\n",
            gw->l3e.l3, mfn_x(gw->l3mfn));
#else  /* PAE only... */
    gprintk(XENLOG_INFO, "   l3e=%" PRI_gpte "\n", gw->l3e.l3);
#endif /* PAE or 64... */
#endif /* All levels... */
    gprintk(XENLOG_INFO, "   l2e=%" PRI_gpte " l2mfn=%" PRI_mfn "\n",
            gw->l2e.l2, mfn_x(gw->l2mfn));
#if GUEST_PAGING_LEVELS == 2
    gprintk(XENLOG_INFO, "  el1e=%08" PRIx64 " l1mfn=%" PRI_mfn "\n",
            gw->el1e, mfn_x(gw->l1mfn));
#else
    gprintk(XENLOG_INFO, "   l1e=%" PRI_gpte " l1mfn=%" PRI_mfn "\n",
            gw->l1e.l1, mfn_x(gw->l1mfn));
#endif
    gprintk(XENLOG_INFO, "   pfec=%02x[%c%c%c%c%c%c]\n", gw->pfec,
            gw->pfec & PFEC_prot_key     ? 'K' : '-',
            gw->pfec & PFEC_insn_fetch   ? 'I' : 'd',
            gw->pfec & PFEC_reserved_bit ? 'R' : '-',
            gw->pfec & PFEC_user_mode    ? 'U' : 's',
            gw->pfec & PFEC_write_access ? 'W' : 'r',
            gw->pfec & PFEC_page_present ? 'P' : '-'
        );
}

#endif /* _XEN_ASM_GUEST_PT_H */