1 /******************************************************************************
2 * arch/x86/mm/mem_paging.c
3 *
4 * Memory paging support.
5 *
6 * Copyright (c) 2009 Citrix Systems, Inc. (Patrick Colp)
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program; If not, see <http://www.gnu.org/licenses/>.
20 */
21
22
23 #include <asm/p2m.h>
24 #include <xen/guest_access.h>
25 #include <xen/vm_event.h>
26 #include <xsm/xsm.h>
27
28 #include "mm-locks.h"
29
30 /*
31 * p2m_mem_paging_drop_page - Tell pager to drop its reference to a paged page
32 * @d: guest domain
33 * @gfn: guest page to drop
34 *
35 * p2m_mem_paging_drop_page() will notify the pager that a paged-out gfn was
36 * released by the guest. The pager is supposed to drop its reference of the
37 * gfn.
38 */
p2m_mem_paging_drop_page(struct domain * d,gfn_t gfn,p2m_type_t p2mt)39 void p2m_mem_paging_drop_page(struct domain *d, gfn_t gfn, p2m_type_t p2mt)
40 {
41 vm_event_request_t req = {
42 .reason = VM_EVENT_REASON_MEM_PAGING,
43 .u.mem_paging.gfn = gfn_x(gfn)
44 };
45
46 /*
47 * We allow no ring in this unique case, because it won't affect
48 * correctness of the guest execution at this point. If this is the only
49 * page that happens to be paged-out, we'll be okay.. but it's likely the
50 * guest will crash shortly anyways.
51 */
52 int rc = vm_event_claim_slot(d, d->vm_event_paging);
53
54 if ( rc < 0 )
55 return;
56
57 /* Send release notification to pager */
58 req.u.mem_paging.flags = MEM_PAGING_DROP_PAGE;
59
60 /* Update stats unless the page hasn't yet been evicted */
61 if ( p2mt != p2m_ram_paging_out )
62 atomic_dec(&d->paged_pages);
63 else
64 /* Evict will fail now, tag this request for pager */
65 req.u.mem_paging.flags |= MEM_PAGING_EVICT_FAIL;
66
67 vm_event_put_request(d, d->vm_event_paging, &req);
68 }
69
70 /*
71 * p2m_mem_paging_populate - Tell pager to populate a paged page
72 * @d: guest domain
73 * @gfn: guest page in paging state
74 *
75 * p2m_mem_paging_populate() will notify the pager that a page in any of the
76 * paging states needs to be written back into the guest.
77 * This function needs to be called whenever gfn_to_mfn() returns any of the p2m
78 * paging types because the gfn may not be backed by a mfn.
79 *
80 * The gfn can be in any of the paging states, but the pager needs only be
81 * notified when the gfn is in the paging-out path (paging_out or paged). This
82 * function may be called more than once from several vcpus. If the vcpu belongs
83 * to the guest, the vcpu must be stopped and the pager notified that the vcpu
84 * was stopped. The pager needs to handle several requests for the same gfn.
85 *
86 * If the gfn is not in the paging-out path and the vcpu does not belong to the
87 * guest, nothing needs to be done and the function assumes that a request was
88 * already sent to the pager. In this case the caller has to try again until the
89 * gfn is fully paged in again.
90 */
p2m_mem_paging_populate(struct domain * d,gfn_t gfn)91 void p2m_mem_paging_populate(struct domain *d, gfn_t gfn)
92 {
93 struct vcpu *v = current;
94 vm_event_request_t req = {
95 .reason = VM_EVENT_REASON_MEM_PAGING,
96 .u.mem_paging.gfn = gfn_x(gfn)
97 };
98 p2m_type_t p2mt;
99 p2m_access_t a;
100 mfn_t mfn;
101 struct p2m_domain *p2m = p2m_get_hostp2m(d);
102 int rc = vm_event_claim_slot(d, d->vm_event_paging);
103
104 /* We're paging. There should be a ring. */
105 if ( rc == -EOPNOTSUPP )
106 {
107 gdprintk(XENLOG_ERR, "%pd paging gfn %"PRI_gfn" yet no ring in place\n",
108 d, gfn_x(gfn));
109 /* Prevent the vcpu from faulting repeatedly on the same gfn */
110 if ( v->domain == d )
111 vcpu_pause_nosync(v);
112 domain_crash(d);
113 return;
114 }
115 else if ( rc < 0 )
116 return;
117
118 /* Fix p2m mapping */
119 gfn_lock(p2m, gfn, 0);
120 mfn = p2m->get_entry(p2m, gfn, &p2mt, &a, 0, NULL, NULL);
121 /* Allow only nominated or evicted pages to enter page-in path */
122 if ( p2mt == p2m_ram_paging_out || p2mt == p2m_ram_paged )
123 {
124 /* Evict will fail now, tag this request for pager */
125 if ( p2mt == p2m_ram_paging_out )
126 req.u.mem_paging.flags |= MEM_PAGING_EVICT_FAIL;
127
128 rc = p2m_set_entry(p2m, gfn, mfn, PAGE_ORDER_4K, p2m_ram_paging_in, a);
129 }
130 gfn_unlock(p2m, gfn, 0);
131 if ( rc < 0 )
132 goto out_cancel;
133
134 /* Pause domain if request came from guest and gfn has paging type */
135 if ( p2m_is_paging(p2mt) && v->domain == d )
136 {
137 vm_event_vcpu_pause(v);
138 req.flags |= VM_EVENT_FLAG_VCPU_PAUSED;
139 }
140 /* No need to inform pager if the gfn is not in the page-out path */
141 else if ( p2mt != p2m_ram_paging_out && p2mt != p2m_ram_paged )
142 {
143 /* gfn is already on its way back and vcpu is not paused */
144 out_cancel:
145 vm_event_cancel_slot(d, d->vm_event_paging);
146 return;
147 }
148
149 /* Send request to pager */
150 req.u.mem_paging.p2mt = p2mt;
151 req.vcpu_id = v->vcpu_id;
152
153 vm_event_put_request(d, d->vm_event_paging, &req);
154 }
155
156 /*
157 * p2m_mem_paging_resume - Resume guest gfn
158 * @d: guest domain
159 * @rsp: vm_event response received
160 *
161 * p2m_mem_paging_resume() will forward the p2mt of a gfn to ram_rw. It is
162 * called by the pager.
163 *
164 * The gfn was previously either evicted and populated, or nominated and
165 * populated. If the page was evicted the p2mt will be p2m_ram_paging_in. If
166 * the page was just nominated the p2mt will be p2m_ram_paging_in_start because
167 * the pager did not call prepare().
168 *
169 * If the gfn was dropped the vcpu needs to be unpaused.
170 */
p2m_mem_paging_resume(struct domain * d,vm_event_response_t * rsp)171 void p2m_mem_paging_resume(struct domain *d, vm_event_response_t *rsp)
172 {
173 struct p2m_domain *p2m = p2m_get_hostp2m(d);
174 p2m_type_t p2mt;
175 p2m_access_t a;
176 mfn_t mfn;
177
178 /* Fix p2m entry if the page was not dropped */
179 if ( !(rsp->u.mem_paging.flags & MEM_PAGING_DROP_PAGE) )
180 {
181 gfn_t gfn = _gfn(rsp->u.mem_access.gfn);
182
183 gfn_lock(p2m, gfn, 0);
184 mfn = p2m->get_entry(p2m, gfn, &p2mt, &a, 0, NULL, NULL);
185 /*
186 * Allow only pages which were prepared properly, or pages which
187 * were nominated but not evicted.
188 */
189 if ( mfn_valid(mfn) && (p2mt == p2m_ram_paging_in) )
190 {
191 int rc = p2m_set_entry(p2m, gfn, mfn, PAGE_ORDER_4K,
192 paging_mode_log_dirty(d) ? p2m_ram_logdirty
193 : p2m_ram_rw, a);
194
195 if ( !rc )
196 set_gpfn_from_mfn(mfn_x(mfn), gfn_x(gfn));
197 }
198 gfn_unlock(p2m, gfn, 0);
199 }
200 }
201
202 /*
203 * nominate - Mark a guest page as to-be-paged-out
204 * @d: guest domain
205 * @gfn: guest page to nominate
206 *
207 * Returns 0 for success or negative errno values if gfn is not pageable.
208 *
209 * nominate() is called by the pager and checks if a guest page can be paged
210 * out. If the following conditions are met the p2mt will be changed:
211 * - the gfn is backed by a mfn
212 * - the p2mt of the gfn is pageable
213 * - the mfn is not used for IO
214 * - the mfn has exactly one user and has no special meaning
215 *
216 * Once the p2mt is changed the page is readonly for the guest. On success the
217 * pager can write the page contents to disk and later evict the page.
218 */
nominate(struct domain * d,gfn_t gfn)219 static int nominate(struct domain *d, gfn_t gfn)
220 {
221 struct page_info *page;
222 struct p2m_domain *p2m = p2m_get_hostp2m(d);
223 p2m_type_t p2mt;
224 p2m_access_t a;
225 mfn_t mfn;
226 int ret = -EBUSY;
227
228 gfn_lock(p2m, gfn, 0);
229
230 mfn = p2m->get_entry(p2m, gfn, &p2mt, &a, 0, NULL, NULL);
231
232 /* Check if mfn is valid */
233 if ( !mfn_valid(mfn) )
234 goto out;
235
236 /* Check p2m type */
237 if ( !p2m_is_pageable(p2mt) )
238 goto out;
239
240 /* Check for io memory page */
241 if ( is_iomem_page(mfn) )
242 goto out;
243
244 /* Check page count and type */
245 page = mfn_to_page(mfn);
246 if ( (page->count_info & (PGC_count_mask | PGC_allocated)) !=
247 (1 | PGC_allocated) )
248 goto out;
249
250 if ( (page->u.inuse.type_info & PGT_count_mask) != 0 )
251 goto out;
252
253 /* Fix p2m entry */
254 ret = p2m_set_entry(p2m, gfn, mfn, PAGE_ORDER_4K, p2m_ram_paging_out, a);
255
256 out:
257 gfn_unlock(p2m, gfn, 0);
258 return ret;
259 }
260
261 /*
262 * evict - Mark a guest page as paged-out
263 * @d: guest domain
264 * @gfn: guest page to evict
265 *
266 * Returns 0 for success or negative errno values if eviction is not possible.
267 *
268 * evict() is called by the pager and will free a guest page and release it
269 * back to Xen. If the following conditions are met the page can be freed:
270 * - the gfn is backed by a mfn
271 * - the gfn was nominated
272 * - the mfn has still exactly one user and has no special meaning
273 *
274 * After successful nomination some other process could have mapped the page. In
275 * this case eviction can not be done. If the gfn was populated before the pager
276 * could evict it, eviction can not be done either. In this case the gfn is
277 * still backed by a mfn.
278 */
evict(struct domain * d,gfn_t gfn)279 static int evict(struct domain *d, gfn_t gfn)
280 {
281 struct page_info *page;
282 p2m_type_t p2mt;
283 p2m_access_t a;
284 mfn_t mfn;
285 struct p2m_domain *p2m = p2m_get_hostp2m(d);
286 int ret = -EBUSY;
287
288 gfn_lock(p2m, gfn, 0);
289
290 /* Get mfn */
291 mfn = p2m->get_entry(p2m, gfn, &p2mt, &a, 0, NULL, NULL);
292 if ( unlikely(!mfn_valid(mfn)) )
293 goto out;
294
295 /* Allow only nominated pages */
296 if ( p2mt != p2m_ram_paging_out )
297 goto out;
298
299 /* Get the page so it doesn't get modified under Xen's feet */
300 page = mfn_to_page(mfn);
301 if ( unlikely(!get_page(page, d)) )
302 goto out;
303
304 /* Check page count and type once more */
305 if ( (page->count_info & (PGC_count_mask | PGC_allocated)) !=
306 (2 | PGC_allocated) )
307 goto out_put;
308
309 if ( (page->u.inuse.type_info & PGT_count_mask) != 0 )
310 goto out_put;
311
312 /* Decrement guest domain's ref count of the page */
313 put_page_alloc_ref(page);
314
315 /* Remove mapping from p2m table */
316 ret = p2m_set_entry(p2m, gfn, INVALID_MFN, PAGE_ORDER_4K,
317 p2m_ram_paged, a);
318
319 /* Clear content before returning the page to Xen */
320 scrub_one_page(page);
321
322 /* Track number of paged gfns */
323 atomic_inc(&d->paged_pages);
324
325 out_put:
326 /* Put the page back so it gets freed */
327 put_page(page);
328
329 out:
330 gfn_unlock(p2m, gfn, 0);
331 return ret;
332 }
333
334 /*
335 * prepare - Allocate a new page for the guest
336 * @d: guest domain
337 * @gfn: guest page in paging state
338 *
339 * prepare() will allocate a new page for the guest if the gfn is not backed
340 * by a mfn. It is called by the pager.
341 * It is required that the gfn was already populated. The gfn may already have a
342 * mfn if populate was called for gfn which was nominated but not evicted. In
343 * this case only the p2mt needs to be forwarded.
344 */
prepare(struct domain * d,gfn_t gfn,XEN_GUEST_HANDLE_64 (const_uint8)buffer)345 static int prepare(struct domain *d, gfn_t gfn,
346 XEN_GUEST_HANDLE_64(const_uint8) buffer)
347 {
348 struct page_info *page = NULL;
349 p2m_type_t p2mt;
350 p2m_access_t a;
351 mfn_t mfn;
352 struct p2m_domain *p2m = p2m_get_hostp2m(d);
353 int ret, page_extant = 1;
354
355 if ( !guest_handle_okay(buffer, PAGE_SIZE) )
356 return -EINVAL;
357
358 gfn_lock(p2m, gfn, 0);
359
360 mfn = p2m->get_entry(p2m, gfn, &p2mt, &a, 0, NULL, NULL);
361
362 ret = -ENOENT;
363 /* Allow missing pages */
364 if ( (p2mt != p2m_ram_paging_in) && (p2mt != p2m_ram_paged) )
365 goto out;
366
367 /* Allocate a page if the gfn does not have one yet */
368 if ( !mfn_valid(mfn) )
369 {
370 void *guest_map;
371
372 /* If the user did not provide a buffer, we disallow */
373 ret = -EINVAL;
374 if ( unlikely(guest_handle_is_null(buffer)) )
375 goto out;
376 /* Get a free page */
377 ret = -ENOMEM;
378 page_alloc_mm_pre_lock(d);
379 page = alloc_domheap_page(d, 0);
380 if ( unlikely(page == NULL) )
381 goto out;
382 if ( unlikely(!get_page(page, d)) )
383 {
384 /*
385 * The domain can't possibly know about this page yet, so failure
386 * here is a clear indication of something fishy going on.
387 */
388 gprintk(XENLOG_ERR,
389 "%pd: fresh page for GFN %"PRI_gfn" in unexpected state\n",
390 d, gfn_x(gfn));
391 domain_crash(d);
392 page = NULL;
393 goto out;
394 }
395 mfn = page_to_mfn(page);
396 page_extant = 0;
397
398 guest_map = map_domain_page(mfn);
399 ret = copy_from_guest(guest_map, buffer, PAGE_SIZE);
400 unmap_domain_page(guest_map);
401 if ( ret )
402 {
403 ret = -EFAULT;
404 goto out;
405 }
406 }
407
408 /*
409 * Make the page already guest-accessible. If the pager still has a
410 * pending resume operation, it will be idempotent p2m entry-wise, but
411 * will unpause the vcpu.
412 */
413 ret = p2m_set_entry(p2m, gfn, mfn, PAGE_ORDER_4K,
414 paging_mode_log_dirty(d) ? p2m_ram_logdirty
415 : p2m_ram_rw, a);
416 if ( !ret )
417 {
418 set_gpfn_from_mfn(mfn_x(mfn), gfn_x(gfn));
419
420 if ( !page_extant )
421 atomic_dec(&d->paged_pages);
422 }
423
424 out:
425 gfn_unlock(p2m, gfn, 0);
426
427 if ( page )
428 {
429 /*
430 * Free the page on error. Drop our temporary reference in all
431 * cases.
432 */
433 if ( ret )
434 put_page_alloc_ref(page);
435 put_page(page);
436 }
437
438 return ret;
439 }
440
mem_paging_memop(XEN_GUEST_HANDLE_PARAM (xen_mem_paging_op_t)arg)441 int mem_paging_memop(XEN_GUEST_HANDLE_PARAM(xen_mem_paging_op_t) arg)
442 {
443 int rc;
444 xen_mem_paging_op_t mpo;
445 struct domain *d;
446 bool_t copyback = 0;
447
448 if ( copy_from_guest(&mpo, arg, 1) )
449 return -EFAULT;
450
451 rc = rcu_lock_live_remote_domain_by_id(mpo.domain, &d);
452 if ( rc )
453 return rc;
454
455 rc = xsm_mem_paging(XSM_DM_PRIV, d);
456 if ( rc )
457 goto out;
458
459 rc = -ENODEV;
460 if ( unlikely(!vm_event_check_ring(d->vm_event_paging)) )
461 goto out;
462
463 switch( mpo.op )
464 {
465 case XENMEM_paging_op_nominate:
466 rc = nominate(d, _gfn(mpo.gfn));
467 break;
468
469 case XENMEM_paging_op_evict:
470 rc = evict(d, _gfn(mpo.gfn));
471 break;
472
473 case XENMEM_paging_op_prep:
474 rc = prepare(d, _gfn(mpo.gfn), mpo.buffer);
475 if ( !rc )
476 copyback = 1;
477 break;
478
479 default:
480 rc = -ENOSYS;
481 break;
482 }
483
484 if ( copyback && __copy_to_guest(arg, &mpo, 1) )
485 rc = -EFAULT;
486
487 out:
488 rcu_unlock_domain(d);
489 return rc;
490 }
491
492
493 /*
494 * Local variables:
495 * mode: C
496 * c-file-style: "BSD"
497 * c-basic-offset: 4
498 * indent-tabs-mode: nil
499 * End:
500 */
501