1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /* IPVS: Power of Twos Choice Scheduling module
3 *
4 * Authors: Darby Payne <darby.payne@applovin.com>
5 */
6
7 #define KMSG_COMPONENT "IPVS"
8 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
9
10 #include <linux/kernel.h>
11 #include <linux/module.h>
12 #include <linux/random.h>
13
14 #include <net/ip_vs.h>
15
16 /* Power of Twos Choice scheduling, algorithm originally described by
17 * Michael Mitzenmacher.
18 *
19 * Randomly picks two destinations and picks the one with the least
20 * amount of connections
21 *
22 * The algorithm calculates a few variables
23 * - total_weight = sum of all weights
24 * - rweight1 = random number between [0,total_weight]
25 * - rweight2 = random number between [0,total_weight]
26 *
27 * For each destination
28 * decrement rweight1 and rweight2 by the destination weight
29 * pick choice1 when rweight1 is <= 0
30 * pick choice2 when rweight2 is <= 0
31 *
32 * Return choice2 if choice2 has less connections than choice 1 normalized
33 * by weight
34 *
35 * References
36 * ----------
37 *
38 * [Mitzenmacher 2016]
39 * The Power of Two Random Choices: A Survey of Techniques and Results
40 * Michael Mitzenmacher, Andrea W. Richa y, Ramesh Sitaraman
41 * http://www.eecs.harvard.edu/~michaelm/NEWWORK/postscripts/twosurvey.pdf
42 *
43 */
ip_vs_twos_schedule(struct ip_vs_service * svc,const struct sk_buff * skb,struct ip_vs_iphdr * iph)44 static struct ip_vs_dest *ip_vs_twos_schedule(struct ip_vs_service *svc,
45 const struct sk_buff *skb,
46 struct ip_vs_iphdr *iph)
47 {
48 struct ip_vs_dest *dest, *choice1 = NULL, *choice2 = NULL;
49 int rweight1, rweight2, weight1 = -1, weight2 = -1, overhead1 = 0;
50 int overhead2, total_weight = 0, weight;
51
52 IP_VS_DBG(6, "%s(): Scheduling...\n", __func__);
53
54 /* Generate a random weight between [0,sum of all weights) */
55 list_for_each_entry_rcu(dest, &svc->destinations, n_list) {
56 if (!(dest->flags & IP_VS_DEST_F_OVERLOAD)) {
57 weight = atomic_read(&dest->weight);
58 if (weight > 0) {
59 total_weight += weight;
60 choice1 = dest;
61 }
62 }
63 }
64
65 if (!choice1) {
66 ip_vs_scheduler_err(svc, "no destination available");
67 return NULL;
68 }
69
70 /* Add 1 to total_weight so that the random weights are inclusive
71 * from 0 to total_weight
72 */
73 total_weight += 1;
74 rweight1 = prandom_u32() % total_weight;
75 rweight2 = prandom_u32() % total_weight;
76
77 /* Pick two weighted servers */
78 list_for_each_entry_rcu(dest, &svc->destinations, n_list) {
79 if (dest->flags & IP_VS_DEST_F_OVERLOAD)
80 continue;
81
82 weight = atomic_read(&dest->weight);
83 if (weight <= 0)
84 continue;
85
86 rweight1 -= weight;
87 rweight2 -= weight;
88
89 if (rweight1 <= 0 && weight1 == -1) {
90 choice1 = dest;
91 weight1 = weight;
92 overhead1 = ip_vs_dest_conn_overhead(dest);
93 }
94
95 if (rweight2 <= 0 && weight2 == -1) {
96 choice2 = dest;
97 weight2 = weight;
98 overhead2 = ip_vs_dest_conn_overhead(dest);
99 }
100
101 if (weight1 != -1 && weight2 != -1)
102 goto nextstage;
103 }
104
105 nextstage:
106 if (choice2 && (weight2 * overhead1) > (weight1 * overhead2))
107 choice1 = choice2;
108
109 IP_VS_DBG_BUF(6, "twos: server %s:%u conns %d refcnt %d weight %d\n",
110 IP_VS_DBG_ADDR(choice1->af, &choice1->addr),
111 ntohs(choice1->port), atomic_read(&choice1->activeconns),
112 refcount_read(&choice1->refcnt),
113 atomic_read(&choice1->weight));
114
115 return choice1;
116 }
117
118 static struct ip_vs_scheduler ip_vs_twos_scheduler = {
119 .name = "twos",
120 .refcnt = ATOMIC_INIT(0),
121 .module = THIS_MODULE,
122 .n_list = LIST_HEAD_INIT(ip_vs_twos_scheduler.n_list),
123 .schedule = ip_vs_twos_schedule,
124 };
125
ip_vs_twos_init(void)126 static int __init ip_vs_twos_init(void)
127 {
128 return register_ip_vs_scheduler(&ip_vs_twos_scheduler);
129 }
130
ip_vs_twos_cleanup(void)131 static void __exit ip_vs_twos_cleanup(void)
132 {
133 unregister_ip_vs_scheduler(&ip_vs_twos_scheduler);
134 synchronize_rcu();
135 }
136
137 module_init(ip_vs_twos_init);
138 module_exit(ip_vs_twos_cleanup);
139 MODULE_LICENSE("GPL");
140