KVM
eventfd.c
Go to the documentation of this file.
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * kvm eventfd support - use eventfd objects to signal various KVM events
4  *
5  * Copyright 2009 Novell. All Rights Reserved.
6  * Copyright 2010 Red Hat, Inc. and/or its affiliates.
7  *
8  * Author:
9  * Gregory Haskins <ghaskins@novell.com>
10  */
11 
12 #include <linux/kvm_host.h>
13 #include <linux/kvm.h>
14 #include <linux/kvm_irqfd.h>
15 #include <linux/workqueue.h>
16 #include <linux/syscalls.h>
17 #include <linux/wait.h>
18 #include <linux/poll.h>
19 #include <linux/file.h>
20 #include <linux/list.h>
21 #include <linux/eventfd.h>
22 #include <linux/kernel.h>
23 #include <linux/srcu.h>
24 #include <linux/slab.h>
25 #include <linux/seqlock.h>
26 #include <linux/irqbypass.h>
27 #include <trace/events/kvm.h>
28 
29 #include <kvm/iodev.h>
30 
31 #ifdef CONFIG_HAVE_KVM_IRQCHIP
32 
33 static struct workqueue_struct *irqfd_cleanup_wq;
34 
35 bool __attribute__((weak))
36 kvm_arch_irqfd_allowed(struct kvm *kvm, struct kvm_irqfd *args)
37 {
38  return true;
39 }
40 
41 static void
42 irqfd_inject(struct work_struct *work)
43 {
44  struct kvm_kernel_irqfd *irqfd =
45  container_of(work, struct kvm_kernel_irqfd, inject);
46  struct kvm *kvm = irqfd->kvm;
47 
48  if (!irqfd->resampler) {
49  kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 1,
50  false);
51  kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 0,
52  false);
53  } else
54  kvm_set_irq(kvm, KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID,
55  irqfd->gsi, 1, false);
56 }
57 
58 static void irqfd_resampler_notify(struct kvm_kernel_irqfd_resampler *resampler)
59 {
60  struct kvm_kernel_irqfd *irqfd;
61 
62  list_for_each_entry_srcu(irqfd, &resampler->list, resampler_link,
63  srcu_read_lock_held(&resampler->kvm->irq_srcu))
64  eventfd_signal(irqfd->resamplefd);
65 }
66 
67 /*
68  * Since resampler irqfds share an IRQ source ID, we de-assert once
69  * then notify all of the resampler irqfds using this GSI. We can't
70  * do multiple de-asserts or we risk racing with incoming re-asserts.
71  */
72 static void
73 irqfd_resampler_ack(struct kvm_irq_ack_notifier *kian)
74 {
75  struct kvm_kernel_irqfd_resampler *resampler;
76  struct kvm *kvm;
77  int idx;
78 
79  resampler = container_of(kian,
80  struct kvm_kernel_irqfd_resampler, notifier);
81  kvm = resampler->kvm;
82 
83  kvm_set_irq(kvm, KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID,
84  resampler->notifier.gsi, 0, false);
85 
86  idx = srcu_read_lock(&kvm->irq_srcu);
87  irqfd_resampler_notify(resampler);
88  srcu_read_unlock(&kvm->irq_srcu, idx);
89 }
90 
91 static void
92 irqfd_resampler_shutdown(struct kvm_kernel_irqfd *irqfd)
93 {
94  struct kvm_kernel_irqfd_resampler *resampler = irqfd->resampler;
95  struct kvm *kvm = resampler->kvm;
96 
97  mutex_lock(&kvm->irqfds.resampler_lock);
98 
99  list_del_rcu(&irqfd->resampler_link);
100  synchronize_srcu(&kvm->irq_srcu);
101 
102  if (list_empty(&resampler->list)) {
103  list_del_rcu(&resampler->link);
104  kvm_unregister_irq_ack_notifier(kvm, &resampler->notifier);
105  /*
106  * synchronize_srcu(&kvm->irq_srcu) already called
107  * in kvm_unregister_irq_ack_notifier().
108  */
109  kvm_set_irq(kvm, KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID,
110  resampler->notifier.gsi, 0, false);
111  kfree(resampler);
112  }
113 
114  mutex_unlock(&kvm->irqfds.resampler_lock);
115 }
116 
117 /*
118  * Race-free decouple logic (ordering is critical)
119  */
120 static void
121 irqfd_shutdown(struct work_struct *work)
122 {
123  struct kvm_kernel_irqfd *irqfd =
124  container_of(work, struct kvm_kernel_irqfd, shutdown);
125  struct kvm *kvm = irqfd->kvm;
126  u64 cnt;
127 
128  /* Make sure irqfd has been initialized in assign path. */
129  synchronize_srcu(&kvm->irq_srcu);
130 
131  /*
132  * Synchronize with the wait-queue and unhook ourselves to prevent
133  * further events.
134  */
135  eventfd_ctx_remove_wait_queue(irqfd->eventfd, &irqfd->wait, &cnt);
136 
137  /*
138  * We know no new events will be scheduled at this point, so block
139  * until all previously outstanding events have completed
140  */
141  flush_work(&irqfd->inject);
142 
143  if (irqfd->resampler) {
144  irqfd_resampler_shutdown(irqfd);
145  eventfd_ctx_put(irqfd->resamplefd);
146  }
147 
148  /*
149  * It is now safe to release the object's resources
150  */
151 #ifdef CONFIG_HAVE_KVM_IRQ_BYPASS
152  irq_bypass_unregister_consumer(&irqfd->consumer);
153 #endif
154  eventfd_ctx_put(irqfd->eventfd);
155  kfree(irqfd);
156 }
157 
158 
159 /* assumes kvm->irqfds.lock is held */
160 static bool
161 irqfd_is_active(struct kvm_kernel_irqfd *irqfd)
162 {
163  return list_empty(&irqfd->list) ? false : true;
164 }
165 
166 /*
167  * Mark the irqfd as inactive and schedule it for removal
168  *
169  * assumes kvm->irqfds.lock is held
170  */
171 static void
172 irqfd_deactivate(struct kvm_kernel_irqfd *irqfd)
173 {
174  BUG_ON(!irqfd_is_active(irqfd));
175 
176  list_del_init(&irqfd->list);
177 
178  queue_work(irqfd_cleanup_wq, &irqfd->shutdown);
179 }
180 
182  struct kvm_kernel_irq_routing_entry *irq,
183  struct kvm *kvm, int irq_source_id,
184  int level,
185  bool line_status)
186 {
187  return -EWOULDBLOCK;
188 }
189 
190 /*
191  * Called with wqh->lock held and interrupts disabled
192  */
193 static int
194 irqfd_wakeup(wait_queue_entry_t *wait, unsigned mode, int sync, void *key)
195 {
196  struct kvm_kernel_irqfd *irqfd =
197  container_of(wait, struct kvm_kernel_irqfd, wait);
198  __poll_t flags = key_to_poll(key);
199  struct kvm_kernel_irq_routing_entry irq;
200  struct kvm *kvm = irqfd->kvm;
201  unsigned seq;
202  int idx;
203  int ret = 0;
204 
205  if (flags & EPOLLIN) {
206  u64 cnt;
207  eventfd_ctx_do_read(irqfd->eventfd, &cnt);
208 
209  idx = srcu_read_lock(&kvm->irq_srcu);
210  do {
211  seq = read_seqcount_begin(&irqfd->irq_entry_sc);
212  irq = irqfd->irq_entry;
213  } while (read_seqcount_retry(&irqfd->irq_entry_sc, seq));
214  /* An event has been signaled, inject an interrupt */
215  if (kvm_arch_set_irq_inatomic(&irq, kvm,
216  KVM_USERSPACE_IRQ_SOURCE_ID, 1,
217  false) == -EWOULDBLOCK)
218  schedule_work(&irqfd->inject);
219  srcu_read_unlock(&kvm->irq_srcu, idx);
220  ret = 1;
221  }
222 
223  if (flags & EPOLLHUP) {
224  /* The eventfd is closing, detach from KVM */
225  unsigned long iflags;
226 
227  spin_lock_irqsave(&kvm->irqfds.lock, iflags);
228 
229  /*
230  * We must check if someone deactivated the irqfd before
231  * we could acquire the irqfds.lock since the item is
232  * deactivated from the KVM side before it is unhooked from
233  * the wait-queue. If it is already deactivated, we can
234  * simply return knowing the other side will cleanup for us.
235  * We cannot race against the irqfd going away since the
236  * other side is required to acquire wqh->lock, which we hold
237  */
238  if (irqfd_is_active(irqfd))
239  irqfd_deactivate(irqfd);
240 
241  spin_unlock_irqrestore(&kvm->irqfds.lock, iflags);
242  }
243 
244  return ret;
245 }
246 
247 static void
248 irqfd_ptable_queue_proc(struct file *file, wait_queue_head_t *wqh,
249  poll_table *pt)
250 {
251  struct kvm_kernel_irqfd *irqfd =
252  container_of(pt, struct kvm_kernel_irqfd, pt);
253  add_wait_queue_priority(wqh, &irqfd->wait);
254 }
255 
256 /* Must be called under irqfds.lock */
257 static void irqfd_update(struct kvm *kvm, struct kvm_kernel_irqfd *irqfd)
258 {
259  struct kvm_kernel_irq_routing_entry *e;
260  struct kvm_kernel_irq_routing_entry entries[KVM_NR_IRQCHIPS];
261  int n_entries;
262 
263  n_entries = kvm_irq_map_gsi(kvm, entries, irqfd->gsi);
264 
265  write_seqcount_begin(&irqfd->irq_entry_sc);
266 
267  e = entries;
268  if (n_entries == 1)
269  irqfd->irq_entry = *e;
270  else
271  irqfd->irq_entry.type = 0;
272 
273  write_seqcount_end(&irqfd->irq_entry_sc);
274 }
275 
276 #ifdef CONFIG_HAVE_KVM_IRQ_BYPASS
278  struct irq_bypass_consumer *cons)
279 {
280 }
281 
283  struct irq_bypass_consumer *cons)
284 {
285 }
286 
287 int __attribute__((weak)) kvm_arch_update_irqfd_routing(
288  struct kvm *kvm, unsigned int host_irq,
289  uint32_t guest_irq, bool set)
290 {
291  return 0;
292 }
293 
294 bool __attribute__((weak)) kvm_arch_irqfd_route_changed(
295  struct kvm_kernel_irq_routing_entry *old,
296  struct kvm_kernel_irq_routing_entry *new)
297 {
298  return true;
299 }
300 #endif
301 
302 static int
303 kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args)
304 {
305  struct kvm_kernel_irqfd *irqfd, *tmp;
306  struct fd f;
307  struct eventfd_ctx *eventfd = NULL, *resamplefd = NULL;
308  int ret;
309  __poll_t events;
310  int idx;
311 
312  if (!kvm_arch_intc_initialized(kvm))
313  return -EAGAIN;
314 
315  if (!kvm_arch_irqfd_allowed(kvm, args))
316  return -EINVAL;
317 
318  irqfd = kzalloc(sizeof(*irqfd), GFP_KERNEL_ACCOUNT);
319  if (!irqfd)
320  return -ENOMEM;
321 
322  irqfd->kvm = kvm;
323  irqfd->gsi = args->gsi;
324  INIT_LIST_HEAD(&irqfd->list);
325  INIT_WORK(&irqfd->inject, irqfd_inject);
326  INIT_WORK(&irqfd->shutdown, irqfd_shutdown);
327  seqcount_spinlock_init(&irqfd->irq_entry_sc, &kvm->irqfds.lock);
328 
329  f = fdget(args->fd);
330  if (!f.file) {
331  ret = -EBADF;
332  goto out;
333  }
334 
335  eventfd = eventfd_ctx_fileget(f.file);
336  if (IS_ERR(eventfd)) {
337  ret = PTR_ERR(eventfd);
338  goto fail;
339  }
340 
341  irqfd->eventfd = eventfd;
342 
343  if (args->flags & KVM_IRQFD_FLAG_RESAMPLE) {
344  struct kvm_kernel_irqfd_resampler *resampler;
345 
346  resamplefd = eventfd_ctx_fdget(args->resamplefd);
347  if (IS_ERR(resamplefd)) {
348  ret = PTR_ERR(resamplefd);
349  goto fail;
350  }
351 
352  irqfd->resamplefd = resamplefd;
353  INIT_LIST_HEAD(&irqfd->resampler_link);
354 
355  mutex_lock(&kvm->irqfds.resampler_lock);
356 
357  list_for_each_entry(resampler,
358  &kvm->irqfds.resampler_list, link) {
359  if (resampler->notifier.gsi == irqfd->gsi) {
360  irqfd->resampler = resampler;
361  break;
362  }
363  }
364 
365  if (!irqfd->resampler) {
366  resampler = kzalloc(sizeof(*resampler),
367  GFP_KERNEL_ACCOUNT);
368  if (!resampler) {
369  ret = -ENOMEM;
370  mutex_unlock(&kvm->irqfds.resampler_lock);
371  goto fail;
372  }
373 
374  resampler->kvm = kvm;
375  INIT_LIST_HEAD(&resampler->list);
376  resampler->notifier.gsi = irqfd->gsi;
377  resampler->notifier.irq_acked = irqfd_resampler_ack;
378  INIT_LIST_HEAD(&resampler->link);
379 
380  list_add_rcu(&resampler->link, &kvm->irqfds.resampler_list);
381  kvm_register_irq_ack_notifier(kvm,
382  &resampler->notifier);
383  irqfd->resampler = resampler;
384  }
385 
386  list_add_rcu(&irqfd->resampler_link, &irqfd->resampler->list);
387  synchronize_srcu(&kvm->irq_srcu);
388 
389  mutex_unlock(&kvm->irqfds.resampler_lock);
390  }
391 
392  /*
393  * Install our own custom wake-up handling so we are notified via
394  * a callback whenever someone signals the underlying eventfd
395  */
396  init_waitqueue_func_entry(&irqfd->wait, irqfd_wakeup);
397  init_poll_funcptr(&irqfd->pt, irqfd_ptable_queue_proc);
398 
399  spin_lock_irq(&kvm->irqfds.lock);
400 
401  ret = 0;
402  list_for_each_entry(tmp, &kvm->irqfds.items, list) {
403  if (irqfd->eventfd != tmp->eventfd)
404  continue;
405  /* This fd is used for another irq already. */
406  ret = -EBUSY;
407  spin_unlock_irq(&kvm->irqfds.lock);
408  goto fail;
409  }
410 
411  idx = srcu_read_lock(&kvm->irq_srcu);
412  irqfd_update(kvm, irqfd);
413 
414  list_add_tail(&irqfd->list, &kvm->irqfds.items);
415 
416  spin_unlock_irq(&kvm->irqfds.lock);
417 
418  /*
419  * Check if there was an event already pending on the eventfd
420  * before we registered, and trigger it as if we didn't miss it.
421  */
422  events = vfs_poll(f.file, &irqfd->pt);
423 
424  if (events & EPOLLIN)
425  schedule_work(&irqfd->inject);
426 
427 #ifdef CONFIG_HAVE_KVM_IRQ_BYPASS
428  if (kvm_arch_has_irq_bypass()) {
429  irqfd->consumer.token = (void *)irqfd->eventfd;
430  irqfd->consumer.add_producer = kvm_arch_irq_bypass_add_producer;
431  irqfd->consumer.del_producer = kvm_arch_irq_bypass_del_producer;
432  irqfd->consumer.stop = kvm_arch_irq_bypass_stop;
433  irqfd->consumer.start = kvm_arch_irq_bypass_start;
434  ret = irq_bypass_register_consumer(&irqfd->consumer);
435  if (ret)
436  pr_info("irq bypass consumer (token %p) registration fails: %d\n",
437  irqfd->consumer.token, ret);
438  }
439 #endif
440 
441  srcu_read_unlock(&kvm->irq_srcu, idx);
442 
443  /*
444  * do not drop the file until the irqfd is fully initialized, otherwise
445  * we might race against the EPOLLHUP
446  */
447  fdput(f);
448  return 0;
449 
450 fail:
451  if (irqfd->resampler)
452  irqfd_resampler_shutdown(irqfd);
453 
454  if (resamplefd && !IS_ERR(resamplefd))
455  eventfd_ctx_put(resamplefd);
456 
457  if (eventfd && !IS_ERR(eventfd))
458  eventfd_ctx_put(eventfd);
459 
460  fdput(f);
461 
462 out:
463  kfree(irqfd);
464  return ret;
465 }
466 
467 bool kvm_irq_has_notifier(struct kvm *kvm, unsigned irqchip, unsigned pin)
468 {
469  struct kvm_irq_ack_notifier *kian;
470  int gsi, idx;
471 
472  idx = srcu_read_lock(&kvm->irq_srcu);
473  gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin);
474  if (gsi != -1)
475  hlist_for_each_entry_srcu(kian, &kvm->irq_ack_notifier_list,
476  link, srcu_read_lock_held(&kvm->irq_srcu))
477  if (kian->gsi == gsi) {
478  srcu_read_unlock(&kvm->irq_srcu, idx);
479  return true;
480  }
481 
482  srcu_read_unlock(&kvm->irq_srcu, idx);
483 
484  return false;
485 }
486 EXPORT_SYMBOL_GPL(kvm_irq_has_notifier);
487 
488 void kvm_notify_acked_gsi(struct kvm *kvm, int gsi)
489 {
490  struct kvm_irq_ack_notifier *kian;
491 
492  hlist_for_each_entry_srcu(kian, &kvm->irq_ack_notifier_list,
493  link, srcu_read_lock_held(&kvm->irq_srcu))
494  if (kian->gsi == gsi)
495  kian->irq_acked(kian);
496 }
497 
498 void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin)
499 {
500  int gsi, idx;
501 
502  trace_kvm_ack_irq(irqchip, pin);
503 
504  idx = srcu_read_lock(&kvm->irq_srcu);
505  gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin);
506  if (gsi != -1)
507  kvm_notify_acked_gsi(kvm, gsi);
508  srcu_read_unlock(&kvm->irq_srcu, idx);
509 }
510 
511 void kvm_register_irq_ack_notifier(struct kvm *kvm,
512  struct kvm_irq_ack_notifier *kian)
513 {
514  mutex_lock(&kvm->irq_lock);
515  hlist_add_head_rcu(&kian->link, &kvm->irq_ack_notifier_list);
516  mutex_unlock(&kvm->irq_lock);
517  kvm_arch_post_irq_ack_notifier_list_update(kvm);
518 }
519 
520 void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
521  struct kvm_irq_ack_notifier *kian)
522 {
523  mutex_lock(&kvm->irq_lock);
524  hlist_del_init_rcu(&kian->link);
525  mutex_unlock(&kvm->irq_lock);
526  synchronize_srcu(&kvm->irq_srcu);
527  kvm_arch_post_irq_ack_notifier_list_update(kvm);
528 }
529 
530 /*
531  * shutdown any irqfd's that match fd+gsi
532  */
533 static int
534 kvm_irqfd_deassign(struct kvm *kvm, struct kvm_irqfd *args)
535 {
536  struct kvm_kernel_irqfd *irqfd, *tmp;
537  struct eventfd_ctx *eventfd;
538 
539  eventfd = eventfd_ctx_fdget(args->fd);
540  if (IS_ERR(eventfd))
541  return PTR_ERR(eventfd);
542 
543  spin_lock_irq(&kvm->irqfds.lock);
544 
545  list_for_each_entry_safe(irqfd, tmp, &kvm->irqfds.items, list) {
546  if (irqfd->eventfd == eventfd && irqfd->gsi == args->gsi) {
547  /*
548  * This clearing of irq_entry.type is needed for when
549  * another thread calls kvm_irq_routing_update before
550  * we flush workqueue below (we synchronize with
551  * kvm_irq_routing_update using irqfds.lock).
552  */
553  write_seqcount_begin(&irqfd->irq_entry_sc);
554  irqfd->irq_entry.type = 0;
555  write_seqcount_end(&irqfd->irq_entry_sc);
556  irqfd_deactivate(irqfd);
557  }
558  }
559 
560  spin_unlock_irq(&kvm->irqfds.lock);
561  eventfd_ctx_put(eventfd);
562 
563  /*
564  * Block until we know all outstanding shutdown jobs have completed
565  * so that we guarantee there will not be any more interrupts on this
566  * gsi once this deassign function returns.
567  */
568  flush_workqueue(irqfd_cleanup_wq);
569 
570  return 0;
571 }
572 
573 int
574 kvm_irqfd(struct kvm *kvm, struct kvm_irqfd *args)
575 {
576  if (args->flags & ~(KVM_IRQFD_FLAG_DEASSIGN | KVM_IRQFD_FLAG_RESAMPLE))
577  return -EINVAL;
578 
579  if (args->flags & KVM_IRQFD_FLAG_DEASSIGN)
580  return kvm_irqfd_deassign(kvm, args);
581 
582  return kvm_irqfd_assign(kvm, args);
583 }
584 
585 /*
586  * This function is called as the kvm VM fd is being released. Shutdown all
587  * irqfds that still remain open
588  */
589 void
590 kvm_irqfd_release(struct kvm *kvm)
591 {
592  struct kvm_kernel_irqfd *irqfd, *tmp;
593 
594  spin_lock_irq(&kvm->irqfds.lock);
595 
596  list_for_each_entry_safe(irqfd, tmp, &kvm->irqfds.items, list)
597  irqfd_deactivate(irqfd);
598 
599  spin_unlock_irq(&kvm->irqfds.lock);
600 
601  /*
602  * Block until we know all outstanding shutdown jobs have completed
603  * since we do not take a kvm* reference.
604  */
605  flush_workqueue(irqfd_cleanup_wq);
606 
607 }
608 
609 /*
610  * Take note of a change in irq routing.
611  * Caller must invoke synchronize_srcu(&kvm->irq_srcu) afterwards.
612  */
613 void kvm_irq_routing_update(struct kvm *kvm)
614 {
615  struct kvm_kernel_irqfd *irqfd;
616 
617  spin_lock_irq(&kvm->irqfds.lock);
618 
619  list_for_each_entry(irqfd, &kvm->irqfds.items, list) {
620 #ifdef CONFIG_HAVE_KVM_IRQ_BYPASS
621  /* Under irqfds.lock, so can read irq_entry safely */
622  struct kvm_kernel_irq_routing_entry old = irqfd->irq_entry;
623 #endif
624 
625  irqfd_update(kvm, irqfd);
626 
627 #ifdef CONFIG_HAVE_KVM_IRQ_BYPASS
628  if (irqfd->producer &&
629  kvm_arch_irqfd_route_changed(&old, &irqfd->irq_entry)) {
630  int ret = kvm_arch_update_irqfd_routing(
631  irqfd->kvm, irqfd->producer->irq,
632  irqfd->gsi, 1);
633  WARN_ON(ret);
634  }
635 #endif
636  }
637 
638  spin_unlock_irq(&kvm->irqfds.lock);
639 }
640 
641 bool kvm_notify_irqfd_resampler(struct kvm *kvm,
642  unsigned int irqchip,
643  unsigned int pin)
644 {
645  struct kvm_kernel_irqfd_resampler *resampler;
646  int gsi, idx;
647 
648  idx = srcu_read_lock(&kvm->irq_srcu);
649  gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin);
650  if (gsi != -1) {
651  list_for_each_entry_srcu(resampler,
652  &kvm->irqfds.resampler_list, link,
653  srcu_read_lock_held(&kvm->irq_srcu)) {
654  if (resampler->notifier.gsi == gsi) {
655  irqfd_resampler_notify(resampler);
656  srcu_read_unlock(&kvm->irq_srcu, idx);
657  return true;
658  }
659  }
660  }
661  srcu_read_unlock(&kvm->irq_srcu, idx);
662 
663  return false;
664 }
665 
666 /*
667  * create a host-wide workqueue for issuing deferred shutdown requests
668  * aggregated from all vm* instances. We need our own isolated
669  * queue to ease flushing work items when a VM exits.
670  */
671 int kvm_irqfd_init(void)
672 {
673  irqfd_cleanup_wq = alloc_workqueue("kvm-irqfd-cleanup", 0, 0);
674  if (!irqfd_cleanup_wq)
675  return -ENOMEM;
676 
677  return 0;
678 }
679 
680 void kvm_irqfd_exit(void)
681 {
682  destroy_workqueue(irqfd_cleanup_wq);
683 }
684 #endif
685 
686 /*
687  * --------------------------------------------------------------------
688  * ioeventfd: translate a PIO/MMIO memory write to an eventfd signal.
689  *
690  * userspace can register a PIO/MMIO address with an eventfd for receiving
691  * notification when the memory has been touched.
692  * --------------------------------------------------------------------
693  */
694 
695 struct _ioeventfd {
696  struct list_head list;
697  u64 addr;
698  int length;
699  struct eventfd_ctx *eventfd;
701  struct kvm_io_device dev;
703  bool wildcard;
704 };
705 
706 static inline struct _ioeventfd *
708 {
709  return container_of(dev, struct _ioeventfd, dev);
710 }
711 
712 static void
714 {
715  eventfd_ctx_put(p->eventfd);
716  list_del(&p->list);
717  kfree(p);
718 }
719 
720 static bool
721 ioeventfd_in_range(struct _ioeventfd *p, gpa_t addr, int len, const void *val)
722 {
723  u64 _val;
724 
725  if (addr != p->addr)
726  /* address must be precise for a hit */
727  return false;
728 
729  if (!p->length)
730  /* length = 0 means only look at the address, so always a hit */
731  return true;
732 
733  if (len != p->length)
734  /* address-range must be precise for a hit */
735  return false;
736 
737  if (p->wildcard)
738  /* all else equal, wildcard is always a hit */
739  return true;
740 
741  /* otherwise, we have to actually compare the data */
742 
743  BUG_ON(!IS_ALIGNED((unsigned long)val, len));
744 
745  switch (len) {
746  case 1:
747  _val = *(u8 *)val;
748  break;
749  case 2:
750  _val = *(u16 *)val;
751  break;
752  case 4:
753  _val = *(u32 *)val;
754  break;
755  case 8:
756  _val = *(u64 *)val;
757  break;
758  default:
759  return false;
760  }
761 
762  return _val == p->datamatch;
763 }
764 
765 /* MMIO/PIO writes trigger an event if the addr/val match */
766 static int
767 ioeventfd_write(struct kvm_vcpu *vcpu, struct kvm_io_device *this, gpa_t addr,
768  int len, const void *val)
769 {
770  struct _ioeventfd *p = to_ioeventfd(this);
771 
772  if (!ioeventfd_in_range(p, addr, len, val))
773  return -EOPNOTSUPP;
774 
775  eventfd_signal(p->eventfd);
776  return 0;
777 }
778 
779 /*
780  * This function is called as KVM is completely shutting down. We do not
781  * need to worry about locking just nuke anything we have as quickly as possible
782  */
783 static void
785 {
786  struct _ioeventfd *p = to_ioeventfd(this);
787 
789 }
790 
791 static const struct kvm_io_device_ops ioeventfd_ops = {
793  .destructor = ioeventfd_destructor,
794 };
795 
796 /* assumes kvm->slots_lock held */
797 static bool
798 ioeventfd_check_collision(struct kvm *kvm, struct _ioeventfd *p)
799 {
800  struct _ioeventfd *_p;
801 
802  list_for_each_entry(_p, &kvm->ioeventfds, list)
803  if (_p->bus_idx == p->bus_idx &&
804  _p->addr == p->addr &&
805  (!_p->length || !p->length ||
806  (_p->length == p->length &&
807  (_p->wildcard || p->wildcard ||
808  _p->datamatch == p->datamatch))))
809  return true;
810 
811  return false;
812 }
813 
814 static enum kvm_bus ioeventfd_bus_from_flags(__u32 flags)
815 {
816  if (flags & KVM_IOEVENTFD_FLAG_PIO)
817  return KVM_PIO_BUS;
818  if (flags & KVM_IOEVENTFD_FLAG_VIRTIO_CCW_NOTIFY)
819  return KVM_VIRTIO_CCW_NOTIFY_BUS;
820  return KVM_MMIO_BUS;
821 }
822 
823 static int kvm_assign_ioeventfd_idx(struct kvm *kvm,
824  enum kvm_bus bus_idx,
825  struct kvm_ioeventfd *args)
826 {
827 
828  struct eventfd_ctx *eventfd;
829  struct _ioeventfd *p;
830  int ret;
831 
832  eventfd = eventfd_ctx_fdget(args->fd);
833  if (IS_ERR(eventfd))
834  return PTR_ERR(eventfd);
835 
836  p = kzalloc(sizeof(*p), GFP_KERNEL_ACCOUNT);
837  if (!p) {
838  ret = -ENOMEM;
839  goto fail;
840  }
841 
842  INIT_LIST_HEAD(&p->list);
843  p->addr = args->addr;
844  p->bus_idx = bus_idx;
845  p->length = args->len;
846  p->eventfd = eventfd;
847 
848  /* The datamatch feature is optional, otherwise this is a wildcard */
849  if (args->flags & KVM_IOEVENTFD_FLAG_DATAMATCH)
850  p->datamatch = args->datamatch;
851  else
852  p->wildcard = true;
853 
854  mutex_lock(&kvm->slots_lock);
855 
856  /* Verify that there isn't a match already */
857  if (ioeventfd_check_collision(kvm, p)) {
858  ret = -EEXIST;
859  goto unlock_fail;
860  }
861 
863 
864  ret = kvm_io_bus_register_dev(kvm, bus_idx, p->addr, p->length,
865  &p->dev);
866  if (ret < 0)
867  goto unlock_fail;
868 
869  kvm_get_bus(kvm, bus_idx)->ioeventfd_count++;
870  list_add_tail(&p->list, &kvm->ioeventfds);
871 
872  mutex_unlock(&kvm->slots_lock);
873 
874  return 0;
875 
876 unlock_fail:
877  mutex_unlock(&kvm->slots_lock);
878  kfree(p);
879 
880 fail:
881  eventfd_ctx_put(eventfd);
882 
883  return ret;
884 }
885 
886 static int
887 kvm_deassign_ioeventfd_idx(struct kvm *kvm, enum kvm_bus bus_idx,
888  struct kvm_ioeventfd *args)
889 {
890  struct _ioeventfd *p;
891  struct eventfd_ctx *eventfd;
892  struct kvm_io_bus *bus;
893  int ret = -ENOENT;
894  bool wildcard;
895 
896  eventfd = eventfd_ctx_fdget(args->fd);
897  if (IS_ERR(eventfd))
898  return PTR_ERR(eventfd);
899 
900  wildcard = !(args->flags & KVM_IOEVENTFD_FLAG_DATAMATCH);
901 
902  mutex_lock(&kvm->slots_lock);
903 
904  list_for_each_entry(p, &kvm->ioeventfds, list) {
905  if (p->bus_idx != bus_idx ||
906  p->eventfd != eventfd ||
907  p->addr != args->addr ||
908  p->length != args->len ||
909  p->wildcard != wildcard)
910  continue;
911 
912  if (!p->wildcard && p->datamatch != args->datamatch)
913  continue;
914 
915  kvm_io_bus_unregister_dev(kvm, bus_idx, &p->dev);
916  bus = kvm_get_bus(kvm, bus_idx);
917  if (bus)
918  bus->ioeventfd_count--;
919  ret = 0;
920  break;
921  }
922 
923  mutex_unlock(&kvm->slots_lock);
924 
925  eventfd_ctx_put(eventfd);
926 
927  return ret;
928 }
929 
930 static int kvm_deassign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
931 {
932  enum kvm_bus bus_idx = ioeventfd_bus_from_flags(args->flags);
933  int ret = kvm_deassign_ioeventfd_idx(kvm, bus_idx, args);
934 
935  if (!args->len && bus_idx == KVM_MMIO_BUS)
936  kvm_deassign_ioeventfd_idx(kvm, KVM_FAST_MMIO_BUS, args);
937 
938  return ret;
939 }
940 
941 static int
942 kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
943 {
944  enum kvm_bus bus_idx;
945  int ret;
946 
947  bus_idx = ioeventfd_bus_from_flags(args->flags);
948  /* must be natural-word sized, or 0 to ignore length */
949  switch (args->len) {
950  case 0:
951  case 1:
952  case 2:
953  case 4:
954  case 8:
955  break;
956  default:
957  return -EINVAL;
958  }
959 
960  /* check for range overflow */
961  if (args->addr + args->len < args->addr)
962  return -EINVAL;
963 
964  /* check for extra flags that we don't understand */
965  if (args->flags & ~KVM_IOEVENTFD_VALID_FLAG_MASK)
966  return -EINVAL;
967 
968  /* ioeventfd with no length can't be combined with DATAMATCH */
969  if (!args->len && (args->flags & KVM_IOEVENTFD_FLAG_DATAMATCH))
970  return -EINVAL;
971 
972  ret = kvm_assign_ioeventfd_idx(kvm, bus_idx, args);
973  if (ret)
974  goto fail;
975 
976  /* When length is ignored, MMIO is also put on a separate bus, for
977  * faster lookups.
978  */
979  if (!args->len && bus_idx == KVM_MMIO_BUS) {
980  ret = kvm_assign_ioeventfd_idx(kvm, KVM_FAST_MMIO_BUS, args);
981  if (ret < 0)
982  goto fast_fail;
983  }
984 
985  return 0;
986 
987 fast_fail:
988  kvm_deassign_ioeventfd_idx(kvm, bus_idx, args);
989 fail:
990  return ret;
991 }
992 
993 int
994 kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
995 {
996  if (args->flags & KVM_IOEVENTFD_FLAG_DEASSIGN)
997  return kvm_deassign_ioeventfd(kvm, args);
998 
999  return kvm_assign_ioeventfd(kvm, args);
1000 }
1001 
1002 void
1003 kvm_eventfd_init(struct kvm *kvm)
1004 {
1005 #ifdef CONFIG_HAVE_KVM_IRQCHIP
1006  spin_lock_init(&kvm->irqfds.lock);
1007  INIT_LIST_HEAD(&kvm->irqfds.items);
1008  INIT_LIST_HEAD(&kvm->irqfds.resampler_list);
1009  mutex_init(&kvm->irqfds.resampler_lock);
1010 #endif
1011  INIT_LIST_HEAD(&kvm->ioeventfds);
1012 }
void kvm_arch_irq_bypass_start(struct irq_bypass_consumer *cons)
Definition: arm.c:2522
bool kvm_arch_has_irq_bypass(void)
Definition: arm.c:2490
bool kvm_arch_intc_initialized(struct kvm *kvm)
Definition: arm.c:714
void kvm_arch_irq_bypass_stop(struct irq_bypass_consumer *cons)
Definition: arm.c:2514
int kvm_arch_irq_bypass_add_producer(struct irq_bypass_consumer *cons, struct irq_bypass_producer *prod)
Definition: arm.c:2495
void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *cons, struct irq_bypass_producer *prod)
Definition: arm.c:2504
static bool ioeventfd_in_range(struct _ioeventfd *p, gpa_t addr, int len, const void *val)
Definition: eventfd.c:721
static struct _ioeventfd * to_ioeventfd(struct kvm_io_device *dev)
Definition: eventfd.c:707
static enum kvm_bus ioeventfd_bus_from_flags(__u32 flags)
Definition: eventfd.c:814
static void ioeventfd_destructor(struct kvm_io_device *this)
Definition: eventfd.c:784
static int kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
Definition: eventfd.c:942
static int kvm_assign_ioeventfd_idx(struct kvm *kvm, enum kvm_bus bus_idx, struct kvm_ioeventfd *args)
Definition: eventfd.c:823
static int kvm_deassign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
Definition: eventfd.c:930
void kvm_eventfd_init(struct kvm *kvm)
Definition: eventfd.c:1003
static void ioeventfd_release(struct _ioeventfd *p)
Definition: eventfd.c:713
int kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
Definition: eventfd.c:994
static bool ioeventfd_check_collision(struct kvm *kvm, struct _ioeventfd *p)
Definition: eventfd.c:798
static int ioeventfd_write(struct kvm_vcpu *vcpu, struct kvm_io_device *this, gpa_t addr, int len, const void *val)
Definition: eventfd.c:767
static int kvm_deassign_ioeventfd_idx(struct kvm *kvm, enum kvm_bus bus_idx, struct kvm_ioeventfd *args)
Definition: eventfd.c:887
static const struct kvm_io_device_ops ioeventfd_ops
Definition: eventfd.c:791
EXPORT_SYMBOL_GPL(kvm_gmem_get_pfn)
static void kvm_iodevice_init(struct kvm_io_device *dev, const struct kvm_io_device_ops *ops)
Definition: iodev.h:36
void __attribute__((weak))
Definition: irqchip.c:159
int kvm_irq_map_gsi(struct kvm *kvm, struct kvm_kernel_irq_routing_entry *entries, int gsi)
Definition: irqchip.c:21
int kvm_irq_map_chip_pin(struct kvm *kvm, unsigned irqchip, unsigned pin)
Definition: irqchip.c:40
int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level, bool line_status)
Definition: irqchip.c:70
int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, int len, struct kvm_io_device *dev)
Definition: kvm_main.c:5897
int kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx, struct kvm_io_device *dev)
Definition: kvm_main.c:5941
bool wildcard
Definition: eventfd.c:703
u64 addr
Definition: eventfd.c:697
int length
Definition: eventfd.c:698
u8 bus_idx
Definition: eventfd.c:702
u64 datamatch
Definition: eventfd.c:700
struct kvm_io_device dev
Definition: eventfd.c:701
struct eventfd_ctx * eventfd
Definition: eventfd.c:699
struct list_head list
Definition: eventfd.c:696
int(* write)(struct kvm_vcpu *vcpu, struct kvm_io_device *this, gpa_t addr, int len, const void *val)
Definition: iodev.h:23
int kvm_arch_set_irq_inatomic(struct kvm_kernel_irq_routing_entry *e, struct kvm *kvm, int irq_source_id, int level, bool line_status)
Definition: vgic-irqfd.c:104