1 /******************************************************************************
2  * arch/x86/mm/mem_paging.c
3  *
4  * Memory paging support.
5  *
6  * Copyright (c) 2009 Citrix Systems, Inc. (Patrick Colp)
7  *
8  * This program is free software; you can redistribute it and/or modify
9  * it under the terms of the GNU General Public License as published by
10  * the Free Software Foundation; either version 2 of the License, or
11  * (at your option) any later version.
12  *
13  * This program is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16  * GNU General Public License for more details.
17  *
18  * You should have received a copy of the GNU General Public License
19  * along with this program; If not, see <http://www.gnu.org/licenses/>.
20  */
21 
22 
23 #include <asm/p2m.h>
24 #include <xen/guest_access.h>
25 #include <xen/vm_event.h>
26 #include <xsm/xsm.h>
27 
28 #include "mm-locks.h"
29 
30 /*
31  * p2m_mem_paging_drop_page - Tell pager to drop its reference to a paged page
32  * @d: guest domain
33  * @gfn: guest page to drop
34  *
35  * p2m_mem_paging_drop_page() will notify the pager that a paged-out gfn was
36  * released by the guest. The pager is supposed to drop its reference of the
37  * gfn.
38  */
p2m_mem_paging_drop_page(struct domain * d,gfn_t gfn,p2m_type_t p2mt)39 void p2m_mem_paging_drop_page(struct domain *d, gfn_t gfn, p2m_type_t p2mt)
40 {
41     vm_event_request_t req = {
42         .reason = VM_EVENT_REASON_MEM_PAGING,
43         .u.mem_paging.gfn = gfn_x(gfn)
44     };
45 
46     /*
47      * We allow no ring in this unique case, because it won't affect
48      * correctness of the guest execution at this point.  If this is the only
49      * page that happens to be paged-out, we'll be okay..  but it's likely the
50      * guest will crash shortly anyways.
51      */
52     int rc = vm_event_claim_slot(d, d->vm_event_paging);
53 
54     if ( rc < 0 )
55         return;
56 
57     /* Send release notification to pager */
58     req.u.mem_paging.flags = MEM_PAGING_DROP_PAGE;
59 
60     /* Update stats unless the page hasn't yet been evicted */
61     if ( p2mt != p2m_ram_paging_out )
62         atomic_dec(&d->paged_pages);
63     else
64         /* Evict will fail now, tag this request for pager */
65         req.u.mem_paging.flags |= MEM_PAGING_EVICT_FAIL;
66 
67     vm_event_put_request(d, d->vm_event_paging, &req);
68 }
69 
70 /*
71  * p2m_mem_paging_populate - Tell pager to populate a paged page
72  * @d: guest domain
73  * @gfn: guest page in paging state
74  *
75  * p2m_mem_paging_populate() will notify the pager that a page in any of the
76  * paging states needs to be written back into the guest.
77  * This function needs to be called whenever gfn_to_mfn() returns any of the p2m
78  * paging types because the gfn may not be backed by a mfn.
79  *
80  * The gfn can be in any of the paging states, but the pager needs only be
81  * notified when the gfn is in the paging-out path (paging_out or paged).  This
82  * function may be called more than once from several vcpus. If the vcpu belongs
83  * to the guest, the vcpu must be stopped and the pager notified that the vcpu
84  * was stopped. The pager needs to handle several requests for the same gfn.
85  *
86  * If the gfn is not in the paging-out path and the vcpu does not belong to the
87  * guest, nothing needs to be done and the function assumes that a request was
88  * already sent to the pager. In this case the caller has to try again until the
89  * gfn is fully paged in again.
90  */
p2m_mem_paging_populate(struct domain * d,gfn_t gfn)91 void p2m_mem_paging_populate(struct domain *d, gfn_t gfn)
92 {
93     struct vcpu *v = current;
94     vm_event_request_t req = {
95         .reason = VM_EVENT_REASON_MEM_PAGING,
96         .u.mem_paging.gfn = gfn_x(gfn)
97     };
98     p2m_type_t p2mt;
99     p2m_access_t a;
100     mfn_t mfn;
101     struct p2m_domain *p2m = p2m_get_hostp2m(d);
102     int rc = vm_event_claim_slot(d, d->vm_event_paging);
103 
104     /* We're paging. There should be a ring. */
105     if ( rc == -EOPNOTSUPP )
106     {
107         gdprintk(XENLOG_ERR, "%pd paging gfn %"PRI_gfn" yet no ring in place\n",
108                  d, gfn_x(gfn));
109         /* Prevent the vcpu from faulting repeatedly on the same gfn */
110         if ( v->domain == d )
111             vcpu_pause_nosync(v);
112         domain_crash(d);
113         return;
114     }
115     else if ( rc < 0 )
116         return;
117 
118     /* Fix p2m mapping */
119     gfn_lock(p2m, gfn, 0);
120     mfn = p2m->get_entry(p2m, gfn, &p2mt, &a, 0, NULL, NULL);
121     /* Allow only nominated or evicted pages to enter page-in path */
122     if ( p2mt == p2m_ram_paging_out || p2mt == p2m_ram_paged )
123     {
124         /* Evict will fail now, tag this request for pager */
125         if ( p2mt == p2m_ram_paging_out )
126             req.u.mem_paging.flags |= MEM_PAGING_EVICT_FAIL;
127 
128         rc = p2m_set_entry(p2m, gfn, mfn, PAGE_ORDER_4K, p2m_ram_paging_in, a);
129     }
130     gfn_unlock(p2m, gfn, 0);
131     if ( rc < 0 )
132         goto out_cancel;
133 
134     /* Pause domain if request came from guest and gfn has paging type */
135     if ( p2m_is_paging(p2mt) && v->domain == d )
136     {
137         vm_event_vcpu_pause(v);
138         req.flags |= VM_EVENT_FLAG_VCPU_PAUSED;
139     }
140     /* No need to inform pager if the gfn is not in the page-out path */
141     else if ( p2mt != p2m_ram_paging_out && p2mt != p2m_ram_paged )
142     {
143         /* gfn is already on its way back and vcpu is not paused */
144     out_cancel:
145         vm_event_cancel_slot(d, d->vm_event_paging);
146         return;
147     }
148 
149     /* Send request to pager */
150     req.u.mem_paging.p2mt = p2mt;
151     req.vcpu_id = v->vcpu_id;
152 
153     vm_event_put_request(d, d->vm_event_paging, &req);
154 }
155 
156 /*
157  * p2m_mem_paging_resume - Resume guest gfn
158  * @d: guest domain
159  * @rsp: vm_event response received
160  *
161  * p2m_mem_paging_resume() will forward the p2mt of a gfn to ram_rw. It is
162  * called by the pager.
163  *
164  * The gfn was previously either evicted and populated, or nominated and
165  * populated. If the page was evicted the p2mt will be p2m_ram_paging_in. If
166  * the page was just nominated the p2mt will be p2m_ram_paging_in_start because
167  * the pager did not call prepare().
168  *
169  * If the gfn was dropped the vcpu needs to be unpaused.
170  */
p2m_mem_paging_resume(struct domain * d,vm_event_response_t * rsp)171 void p2m_mem_paging_resume(struct domain *d, vm_event_response_t *rsp)
172 {
173     struct p2m_domain *p2m = p2m_get_hostp2m(d);
174     p2m_type_t p2mt;
175     p2m_access_t a;
176     mfn_t mfn;
177 
178     /* Fix p2m entry if the page was not dropped */
179     if ( !(rsp->u.mem_paging.flags & MEM_PAGING_DROP_PAGE) )
180     {
181         gfn_t gfn = _gfn(rsp->u.mem_access.gfn);
182 
183         gfn_lock(p2m, gfn, 0);
184         mfn = p2m->get_entry(p2m, gfn, &p2mt, &a, 0, NULL, NULL);
185         /*
186          * Allow only pages which were prepared properly, or pages which
187          * were nominated but not evicted.
188          */
189         if ( mfn_valid(mfn) && (p2mt == p2m_ram_paging_in) )
190         {
191             int rc = p2m_set_entry(p2m, gfn, mfn, PAGE_ORDER_4K,
192                                    paging_mode_log_dirty(d) ? p2m_ram_logdirty
193                                                             : p2m_ram_rw, a);
194 
195             if ( !rc )
196                 set_gpfn_from_mfn(mfn_x(mfn), gfn_x(gfn));
197         }
198         gfn_unlock(p2m, gfn, 0);
199     }
200 }
201 
202 /*
203  * nominate - Mark a guest page as to-be-paged-out
204  * @d: guest domain
205  * @gfn: guest page to nominate
206  *
207  * Returns 0 for success or negative errno values if gfn is not pageable.
208  *
209  * nominate() is called by the pager and checks if a guest page can be paged
210  * out. If the following conditions are met the p2mt will be changed:
211  * - the gfn is backed by a mfn
212  * - the p2mt of the gfn is pageable
213  * - the mfn is not used for IO
214  * - the mfn has exactly one user and has no special meaning
215  *
216  * Once the p2mt is changed the page is readonly for the guest.  On success the
217  * pager can write the page contents to disk and later evict the page.
218  */
nominate(struct domain * d,gfn_t gfn)219 static int nominate(struct domain *d, gfn_t gfn)
220 {
221     struct page_info *page;
222     struct p2m_domain *p2m = p2m_get_hostp2m(d);
223     p2m_type_t p2mt;
224     p2m_access_t a;
225     mfn_t mfn;
226     int ret = -EBUSY;
227 
228     gfn_lock(p2m, gfn, 0);
229 
230     mfn = p2m->get_entry(p2m, gfn, &p2mt, &a, 0, NULL, NULL);
231 
232     /* Check if mfn is valid */
233     if ( !mfn_valid(mfn) )
234         goto out;
235 
236     /* Check p2m type */
237     if ( !p2m_is_pageable(p2mt) )
238         goto out;
239 
240     /* Check for io memory page */
241     if ( is_iomem_page(mfn) )
242         goto out;
243 
244     /* Check page count and type */
245     page = mfn_to_page(mfn);
246     if ( (page->count_info & (PGC_count_mask | PGC_allocated)) !=
247          (1 | PGC_allocated) )
248         goto out;
249 
250     if ( (page->u.inuse.type_info & PGT_count_mask) != 0 )
251         goto out;
252 
253     /* Fix p2m entry */
254     ret = p2m_set_entry(p2m, gfn, mfn, PAGE_ORDER_4K, p2m_ram_paging_out, a);
255 
256  out:
257     gfn_unlock(p2m, gfn, 0);
258     return ret;
259 }
260 
261 /*
262  * evict - Mark a guest page as paged-out
263  * @d: guest domain
264  * @gfn: guest page to evict
265  *
266  * Returns 0 for success or negative errno values if eviction is not possible.
267  *
268  * evict() is called by the pager and will free a guest page and release it
269  * back to Xen. If the following conditions are met the page can be freed:
270  * - the gfn is backed by a mfn
271  * - the gfn was nominated
272  * - the mfn has still exactly one user and has no special meaning
273  *
274  * After successful nomination some other process could have mapped the page. In
275  * this case eviction can not be done. If the gfn was populated before the pager
276  * could evict it, eviction can not be done either. In this case the gfn is
277  * still backed by a mfn.
278  */
evict(struct domain * d,gfn_t gfn)279 static int evict(struct domain *d, gfn_t gfn)
280 {
281     struct page_info *page;
282     p2m_type_t p2mt;
283     p2m_access_t a;
284     mfn_t mfn;
285     struct p2m_domain *p2m = p2m_get_hostp2m(d);
286     int ret = -EBUSY;
287 
288     gfn_lock(p2m, gfn, 0);
289 
290     /* Get mfn */
291     mfn = p2m->get_entry(p2m, gfn, &p2mt, &a, 0, NULL, NULL);
292     if ( unlikely(!mfn_valid(mfn)) )
293         goto out;
294 
295     /* Allow only nominated pages */
296     if ( p2mt != p2m_ram_paging_out )
297         goto out;
298 
299     /* Get the page so it doesn't get modified under Xen's feet */
300     page = mfn_to_page(mfn);
301     if ( unlikely(!get_page(page, d)) )
302         goto out;
303 
304     /* Check page count and type once more */
305     if ( (page->count_info & (PGC_count_mask | PGC_allocated)) !=
306          (2 | PGC_allocated) )
307         goto out_put;
308 
309     if ( (page->u.inuse.type_info & PGT_count_mask) != 0 )
310         goto out_put;
311 
312     /* Decrement guest domain's ref count of the page */
313     put_page_alloc_ref(page);
314 
315     /* Remove mapping from p2m table */
316     ret = p2m_set_entry(p2m, gfn, INVALID_MFN, PAGE_ORDER_4K,
317                         p2m_ram_paged, a);
318 
319     /* Clear content before returning the page to Xen */
320     scrub_one_page(page);
321 
322     /* Track number of paged gfns */
323     atomic_inc(&d->paged_pages);
324 
325  out_put:
326     /* Put the page back so it gets freed */
327     put_page(page);
328 
329  out:
330     gfn_unlock(p2m, gfn, 0);
331     return ret;
332 }
333 
334 /*
335  * prepare - Allocate a new page for the guest
336  * @d: guest domain
337  * @gfn: guest page in paging state
338  *
339  * prepare() will allocate a new page for the guest if the gfn is not backed
340  * by a mfn. It is called by the pager.
341  * It is required that the gfn was already populated. The gfn may already have a
342  * mfn if populate was called for  gfn which was nominated but not evicted. In
343  * this case only the p2mt needs to be forwarded.
344  */
prepare(struct domain * d,gfn_t gfn,XEN_GUEST_HANDLE_64 (const_uint8)buffer)345 static int prepare(struct domain *d, gfn_t gfn,
346                    XEN_GUEST_HANDLE_64(const_uint8) buffer)
347 {
348     struct page_info *page = NULL;
349     p2m_type_t p2mt;
350     p2m_access_t a;
351     mfn_t mfn;
352     struct p2m_domain *p2m = p2m_get_hostp2m(d);
353     int ret, page_extant = 1;
354 
355     if ( !guest_handle_okay(buffer, PAGE_SIZE) )
356         return -EINVAL;
357 
358     gfn_lock(p2m, gfn, 0);
359 
360     mfn = p2m->get_entry(p2m, gfn, &p2mt, &a, 0, NULL, NULL);
361 
362     ret = -ENOENT;
363     /* Allow missing pages */
364     if ( (p2mt != p2m_ram_paging_in) && (p2mt != p2m_ram_paged) )
365         goto out;
366 
367     /* Allocate a page if the gfn does not have one yet */
368     if ( !mfn_valid(mfn) )
369     {
370         void *guest_map;
371 
372         /* If the user did not provide a buffer, we disallow */
373         ret = -EINVAL;
374         if ( unlikely(guest_handle_is_null(buffer)) )
375             goto out;
376         /* Get a free page */
377         ret = -ENOMEM;
378         page_alloc_mm_pre_lock(d);
379         page = alloc_domheap_page(d, 0);
380         if ( unlikely(page == NULL) )
381             goto out;
382         if ( unlikely(!get_page(page, d)) )
383         {
384             /*
385              * The domain can't possibly know about this page yet, so failure
386              * here is a clear indication of something fishy going on.
387              */
388             gprintk(XENLOG_ERR,
389                     "%pd: fresh page for GFN %"PRI_gfn" in unexpected state\n",
390                     d, gfn_x(gfn));
391             domain_crash(d);
392             page = NULL;
393             goto out;
394         }
395         mfn = page_to_mfn(page);
396         page_extant = 0;
397 
398         guest_map = map_domain_page(mfn);
399         ret = copy_from_guest(guest_map, buffer, PAGE_SIZE);
400         unmap_domain_page(guest_map);
401         if ( ret )
402         {
403             ret = -EFAULT;
404             goto out;
405         }
406     }
407 
408     /*
409      * Make the page already guest-accessible. If the pager still has a
410      * pending resume operation, it will be idempotent p2m entry-wise, but
411      * will unpause the vcpu.
412      */
413     ret = p2m_set_entry(p2m, gfn, mfn, PAGE_ORDER_4K,
414                         paging_mode_log_dirty(d) ? p2m_ram_logdirty
415                                                  : p2m_ram_rw, a);
416     if ( !ret )
417     {
418         set_gpfn_from_mfn(mfn_x(mfn), gfn_x(gfn));
419 
420         if ( !page_extant )
421             atomic_dec(&d->paged_pages);
422     }
423 
424  out:
425     gfn_unlock(p2m, gfn, 0);
426 
427     if ( page )
428     {
429         /*
430          * Free the page on error.  Drop our temporary reference in all
431          * cases.
432          */
433         if ( ret )
434             put_page_alloc_ref(page);
435         put_page(page);
436     }
437 
438     return ret;
439 }
440 
mem_paging_memop(XEN_GUEST_HANDLE_PARAM (xen_mem_paging_op_t)arg)441 int mem_paging_memop(XEN_GUEST_HANDLE_PARAM(xen_mem_paging_op_t) arg)
442 {
443     int rc;
444     xen_mem_paging_op_t mpo;
445     struct domain *d;
446     bool_t copyback = 0;
447 
448     if ( copy_from_guest(&mpo, arg, 1) )
449         return -EFAULT;
450 
451     rc = rcu_lock_live_remote_domain_by_id(mpo.domain, &d);
452     if ( rc )
453         return rc;
454 
455     rc = xsm_mem_paging(XSM_DM_PRIV, d);
456     if ( rc )
457         goto out;
458 
459     rc = -ENODEV;
460     if ( unlikely(!vm_event_check_ring(d->vm_event_paging)) )
461         goto out;
462 
463     switch( mpo.op )
464     {
465     case XENMEM_paging_op_nominate:
466         rc = nominate(d, _gfn(mpo.gfn));
467         break;
468 
469     case XENMEM_paging_op_evict:
470         rc = evict(d, _gfn(mpo.gfn));
471         break;
472 
473     case XENMEM_paging_op_prep:
474         rc = prepare(d, _gfn(mpo.gfn), mpo.buffer);
475         if ( !rc )
476             copyback = 1;
477         break;
478 
479     default:
480         rc = -ENOSYS;
481         break;
482     }
483 
484     if ( copyback && __copy_to_guest(arg, &mpo, 1) )
485         rc = -EFAULT;
486 
487 out:
488     rcu_unlock_domain(d);
489     return rc;
490 }
491 
492 
493 /*
494  * Local variables:
495  * mode: C
496  * c-file-style: "BSD"
497  * c-basic-offset: 4
498  * indent-tabs-mode: nil
499  * End:
500  */
501