1 #ifndef __X86_ALTERNATIVE_H__ 2 #define __X86_ALTERNATIVE_H__ 3 4 #ifdef __ASSEMBLY__ 5 #include <asm/alternative-asm.h> 6 #else 7 #include <xen/lib.h> 8 #include <xen/stringify.h> 9 #include <asm/asm-macros.h> 10 11 struct __packed alt_instr { 12 int32_t orig_offset; /* original instruction */ 13 int32_t repl_offset; /* offset to replacement instruction */ 14 uint16_t cpuid; /* cpuid bit set for replacement */ 15 uint8_t orig_len; /* length of original instruction */ 16 uint8_t repl_len; /* length of new instruction */ 17 uint8_t pad_len; /* length of build-time padding */ 18 uint8_t priv; /* Private, for use by apply_alternatives() */ 19 }; 20 21 #define __ALT_PTR(a,f) ((uint8_t *)((void *)&(a)->f + (a)->f)) 22 #define ALT_ORIG_PTR(a) __ALT_PTR(a, orig_offset) 23 #define ALT_REPL_PTR(a) __ALT_PTR(a, repl_offset) 24 25 extern void add_nops(void *insns, unsigned int len); 26 /* Similar to alternative_instructions except it can be run with IRQs enabled. */ 27 extern void apply_alternatives(struct alt_instr *start, struct alt_instr *end); 28 extern void alternative_instructions(void); 29 extern void alternative_branches(void); 30 31 #define alt_orig_len "(.LXEN%=_orig_e - .LXEN%=_orig_s)" 32 #define alt_pad_len "(.LXEN%=_orig_p - .LXEN%=_orig_e)" 33 #define alt_total_len "(.LXEN%=_orig_p - .LXEN%=_orig_s)" 34 #define alt_repl_s(num) ".LXEN%=_repl_s"#num 35 #define alt_repl_e(num) ".LXEN%=_repl_e"#num 36 #define alt_repl_len(num) "(" alt_repl_e(num) " - " alt_repl_s(num) ")" 37 38 /* GAS's idea of true is -1, while Clang's idea is 1. */ 39 #ifdef HAVE_AS_NEGATIVE_TRUE 40 # define AS_TRUE "-" 41 #else 42 # define AS_TRUE "" 43 #endif 44 45 #define as_max(a, b) "(("a") ^ ((("a") ^ ("b")) & -("AS_TRUE"(("a") < ("b")))))" 46 47 #define OLDINSTR(oldinstr, padding) \ 48 ".LXEN%=_orig_s:\n\t" oldinstr "\n .LXEN%=_orig_e:\n\t" \ 49 ".LXEN%=_diff = " padding "\n\t" \ 50 "mknops ("AS_TRUE"(.LXEN%=_diff > 0) * .LXEN%=_diff)\n\t" \ 51 ".LXEN%=_orig_p:\n\t" 52 53 #define OLDINSTR_1(oldinstr, n1) \ 54 OLDINSTR(oldinstr, alt_repl_len(n1) "-" alt_orig_len) 55 56 #define OLDINSTR_2(oldinstr, n1, n2) \ 57 OLDINSTR(oldinstr, \ 58 as_max(alt_repl_len(n1), \ 59 alt_repl_len(n2)) "-" alt_orig_len) 60 61 #define ALTINSTR_ENTRY(feature, num) \ 62 " .long .LXEN%=_orig_s - .\n" /* label */ \ 63 " .long " alt_repl_s(num)" - .\n" /* new instruction */ \ 64 " .word " __stringify(feature) "\n" /* feature bit */ \ 65 " .byte " alt_orig_len "\n" /* source len */ \ 66 " .byte " alt_repl_len(num) "\n" /* replacement len */ \ 67 " .byte " alt_pad_len "\n" /* padding len */ \ 68 " .byte 0\n" /* priv */ 69 70 #define DISCARD_ENTRY(num) /* repl <= total */ \ 71 " .byte 0xff + (" alt_repl_len(num) ") - (" alt_total_len ")\n" 72 73 #define ALTINSTR_REPLACEMENT(newinstr, num) /* replacement */ \ 74 alt_repl_s(num)":\n\t" newinstr "\n" alt_repl_e(num) ":\n\t" 75 76 /* alternative assembly primitive: */ 77 #define ALTERNATIVE(oldinstr, newinstr, feature) \ 78 OLDINSTR_1(oldinstr, 1) \ 79 ".pushsection .altinstructions, \"a\", @progbits\n" \ 80 ALTINSTR_ENTRY(feature, 1) \ 81 ".section .discard, \"a\", @progbits\n" \ 82 ".byte " alt_total_len "\n" /* total_len <= 255 */ \ 83 DISCARD_ENTRY(1) \ 84 ".section .altinstr_replacement, \"ax\", @progbits\n" \ 85 ALTINSTR_REPLACEMENT(newinstr, 1) \ 86 ".popsection\n" 87 88 #define ALTERNATIVE_2(oldinstr, newinstr1, feature1, newinstr2, feature2) \ 89 OLDINSTR_2(oldinstr, 1, 2) \ 90 ".pushsection .altinstructions, \"a\", @progbits\n" \ 91 ALTINSTR_ENTRY(feature1, 1) \ 92 ALTINSTR_ENTRY(feature2, 2) \ 93 ".section .discard, \"a\", @progbits\n" \ 94 ".byte " alt_total_len "\n" /* total_len <= 255 */ \ 95 DISCARD_ENTRY(1) \ 96 DISCARD_ENTRY(2) \ 97 ".section .altinstr_replacement, \"ax\", @progbits\n" \ 98 ALTINSTR_REPLACEMENT(newinstr1, 1) \ 99 ALTINSTR_REPLACEMENT(newinstr2, 2) \ 100 ".popsection\n" 101 102 /* 103 * Alternative instructions for different CPU types or capabilities. 104 * 105 * This allows to use optimized instructions even on generic binary 106 * kernels. 107 * 108 * length of oldinstr must be longer or equal the length of newinstr 109 * It can be padded with nops as needed. 110 * 111 * For non barrier like inlines please define new variants 112 * without volatile and memory clobber. 113 */ 114 #define alternative(oldinstr, newinstr, feature) \ 115 asm volatile (ALTERNATIVE(oldinstr, newinstr, feature) : : : "memory") 116 117 #define alternative_2(oldinstr, newinstr1, feature1, newinstr2, feature2) \ 118 asm volatile (ALTERNATIVE_2(oldinstr, newinstr1, feature1, \ 119 newinstr2, feature2) \ 120 : : : "memory") 121 122 /* 123 * Alternative inline assembly with input. 124 * 125 * Pecularities: 126 * No memory clobber here. 127 * Argument numbers start with 1. 128 * Best is to use constraints that are fixed size (like (%1) ... "r") 129 * If you use variable sized constraints like "m" or "g" in the 130 * replacement make sure to pad to the worst case length. 131 */ 132 #define alternative_input(oldinstr, newinstr, feature, input...) \ 133 asm volatile (ALTERNATIVE(oldinstr, newinstr, feature) \ 134 : : input) 135 136 /* Like alternative_input, but with a single output argument */ 137 #define alternative_io(oldinstr, newinstr, feature, output, input...) \ 138 asm volatile (ALTERNATIVE(oldinstr, newinstr, feature) \ 139 : output : input) 140 141 /* 142 * This is similar to alternative_io. But it has two features and 143 * respective instructions. 144 * 145 * If CPU has feature2, newinstr2 is used. 146 * Otherwise, if CPU has feature1, newinstr1 is used. 147 * Otherwise, oldinstr is used. 148 */ 149 #define alternative_io_2(oldinstr, newinstr1, feature1, newinstr2, \ 150 feature2, output, input...) \ 151 asm volatile(ALTERNATIVE_2(oldinstr, newinstr1, feature1, \ 152 newinstr2, feature2) \ 153 : output : input) 154 155 /* Use this macro(s) if you need more than one output parameter. */ 156 #define ASM_OUTPUT2(a...) a 157 158 /* 159 * Machinery to allow converting indirect to direct calls, when the called 160 * function is determined once at boot and later never changed. 161 */ 162 163 #define ALT_CALL_arg1 "rdi" 164 #define ALT_CALL_arg2 "rsi" 165 #define ALT_CALL_arg3 "rdx" 166 #define ALT_CALL_arg4 "rcx" 167 #define ALT_CALL_arg5 "r8" 168 #define ALT_CALL_arg6 "r9" 169 170 #define ALT_CALL_ARG(arg, n) \ 171 register typeof((arg) ? (arg) : 0) a ## n ## _ \ 172 asm ( ALT_CALL_arg ## n ) = (arg) 173 #define ALT_CALL_NO_ARG(n) \ 174 register unsigned long a ## n ## _ asm ( ALT_CALL_arg ## n ) 175 176 #define ALT_CALL_NO_ARG6 ALT_CALL_NO_ARG(6) 177 #define ALT_CALL_NO_ARG5 ALT_CALL_NO_ARG(5); ALT_CALL_NO_ARG6 178 #define ALT_CALL_NO_ARG4 ALT_CALL_NO_ARG(4); ALT_CALL_NO_ARG5 179 #define ALT_CALL_NO_ARG3 ALT_CALL_NO_ARG(3); ALT_CALL_NO_ARG4 180 #define ALT_CALL_NO_ARG2 ALT_CALL_NO_ARG(2); ALT_CALL_NO_ARG3 181 #define ALT_CALL_NO_ARG1 ALT_CALL_NO_ARG(1); ALT_CALL_NO_ARG2 182 183 /* 184 * Unfortunately ALT_CALL_NO_ARG() above can't use a fake initializer (to 185 * suppress "uninitialized variable" warnings), as various versions of gcc 186 * older than 8.1 fall on the nose in various ways with that (always because 187 * of some other construct elsewhere in the same function needing to use the 188 * same hard register). Otherwise the asm() below could uniformly use "+r" 189 * output constraints, making unnecessary all these ALT_CALL<n>_OUT macros. 190 */ 191 #define ALT_CALL0_OUT "=r" (a1_), "=r" (a2_), "=r" (a3_), \ 192 "=r" (a4_), "=r" (a5_), "=r" (a6_) 193 #define ALT_CALL1_OUT "+r" (a1_), "=r" (a2_), "=r" (a3_), \ 194 "=r" (a4_), "=r" (a5_), "=r" (a6_) 195 #define ALT_CALL2_OUT "+r" (a1_), "+r" (a2_), "=r" (a3_), \ 196 "=r" (a4_), "=r" (a5_), "=r" (a6_) 197 #define ALT_CALL3_OUT "+r" (a1_), "+r" (a2_), "+r" (a3_), \ 198 "=r" (a4_), "=r" (a5_), "=r" (a6_) 199 #define ALT_CALL4_OUT "+r" (a1_), "+r" (a2_), "+r" (a3_), \ 200 "+r" (a4_), "=r" (a5_), "=r" (a6_) 201 #define ALT_CALL5_OUT "+r" (a1_), "+r" (a2_), "+r" (a3_), \ 202 "+r" (a4_), "+r" (a5_), "=r" (a6_) 203 #define ALT_CALL6_OUT "+r" (a1_), "+r" (a2_), "+r" (a3_), \ 204 "+r" (a4_), "+r" (a5_), "+r" (a6_) 205 206 #define alternative_callN(n, rettype, func) ({ \ 207 rettype ret_; \ 208 register unsigned long r10_ asm("r10"); \ 209 register unsigned long r11_ asm("r11"); \ 210 asm volatile (ALTERNATIVE("call *%c[addr](%%rip)", "call .", \ 211 X86_FEATURE_ALWAYS) \ 212 : ALT_CALL ## n ## _OUT, "=a" (ret_), \ 213 "=r" (r10_), "=r" (r11_) ASM_CALL_CONSTRAINT \ 214 : [addr] "i" (&(func)), "g" (func) \ 215 : "memory" ); \ 216 ret_; \ 217 }) 218 219 #define alternative_vcall0(func) ({ \ 220 ALT_CALL_NO_ARG1; \ 221 ((void)alternative_callN(0, int, func)); \ 222 }) 223 224 #define alternative_call0(func) ({ \ 225 ALT_CALL_NO_ARG1; \ 226 alternative_callN(0, typeof(func()), func); \ 227 }) 228 229 #define alternative_vcall1(func, arg) ({ \ 230 ALT_CALL_ARG(arg, 1); \ 231 ALT_CALL_NO_ARG2; \ 232 (void)sizeof(func(arg)); \ 233 (void)alternative_callN(1, int, func); \ 234 }) 235 236 #define alternative_call1(func, arg) ({ \ 237 ALT_CALL_ARG(arg, 1); \ 238 ALT_CALL_NO_ARG2; \ 239 alternative_callN(1, typeof(func(arg)), func); \ 240 }) 241 242 #define alternative_vcall2(func, arg1, arg2) ({ \ 243 typeof(arg2) v2_ = (arg2); \ 244 ALT_CALL_ARG(arg1, 1); \ 245 ALT_CALL_ARG(v2_, 2); \ 246 ALT_CALL_NO_ARG3; \ 247 (void)sizeof(func(arg1, arg2)); \ 248 (void)alternative_callN(2, int, func); \ 249 }) 250 251 #define alternative_call2(func, arg1, arg2) ({ \ 252 typeof(arg2) v2_ = (arg2); \ 253 ALT_CALL_ARG(arg1, 1); \ 254 ALT_CALL_ARG(v2_, 2); \ 255 ALT_CALL_NO_ARG3; \ 256 alternative_callN(2, typeof(func(arg1, arg2)), func); \ 257 }) 258 259 #define alternative_vcall3(func, arg1, arg2, arg3) ({ \ 260 typeof(arg2) v2_ = (arg2); \ 261 typeof(arg3) v3_ = (arg3); \ 262 ALT_CALL_ARG(arg1, 1); \ 263 ALT_CALL_ARG(v2_, 2); \ 264 ALT_CALL_ARG(v3_, 3); \ 265 ALT_CALL_NO_ARG4; \ 266 (void)sizeof(func(arg1, arg2, arg3)); \ 267 (void)alternative_callN(3, int, func); \ 268 }) 269 270 #define alternative_call3(func, arg1, arg2, arg3) ({ \ 271 typeof(arg2) v2_ = (arg2); \ 272 typeof(arg3) v3_ = (arg3); \ 273 ALT_CALL_ARG(arg1, 1); \ 274 ALT_CALL_ARG(v2_, 2); \ 275 ALT_CALL_ARG(v3_, 3); \ 276 ALT_CALL_NO_ARG4; \ 277 alternative_callN(3, typeof(func(arg1, arg2, arg3)), \ 278 func); \ 279 }) 280 281 #define alternative_vcall4(func, arg1, arg2, arg3, arg4) ({ \ 282 typeof(arg2) v2_ = (arg2); \ 283 typeof(arg3) v3_ = (arg3); \ 284 typeof(arg4) v4_ = (arg4); \ 285 ALT_CALL_ARG(arg1, 1); \ 286 ALT_CALL_ARG(v2_, 2); \ 287 ALT_CALL_ARG(v3_, 3); \ 288 ALT_CALL_ARG(v4_, 4); \ 289 ALT_CALL_NO_ARG5; \ 290 (void)sizeof(func(arg1, arg2, arg3, arg4)); \ 291 (void)alternative_callN(4, int, func); \ 292 }) 293 294 #define alternative_call4(func, arg1, arg2, arg3, arg4) ({ \ 295 typeof(arg2) v2_ = (arg2); \ 296 typeof(arg3) v3_ = (arg3); \ 297 typeof(arg4) v4_ = (arg4); \ 298 ALT_CALL_ARG(arg1, 1); \ 299 ALT_CALL_ARG(v2_, 2); \ 300 ALT_CALL_ARG(v3_, 3); \ 301 ALT_CALL_ARG(v4_, 4); \ 302 ALT_CALL_NO_ARG5; \ 303 alternative_callN(4, typeof(func(arg1, arg2, \ 304 arg3, arg4)), \ 305 func); \ 306 }) 307 308 #define alternative_vcall5(func, arg1, arg2, arg3, arg4, arg5) ({ \ 309 typeof(arg2) v2_ = (arg2); \ 310 typeof(arg3) v3_ = (arg3); \ 311 typeof(arg4) v4_ = (arg4); \ 312 typeof(arg5) v5_ = (arg5); \ 313 ALT_CALL_ARG(arg1, 1); \ 314 ALT_CALL_ARG(v2_, 2); \ 315 ALT_CALL_ARG(v3_, 3); \ 316 ALT_CALL_ARG(v4_, 4); \ 317 ALT_CALL_ARG(v5_, 5); \ 318 ALT_CALL_NO_ARG6; \ 319 (void)sizeof(func(arg1, arg2, arg3, arg4, arg5)); \ 320 (void)alternative_callN(5, int, func); \ 321 }) 322 323 #define alternative_call5(func, arg1, arg2, arg3, arg4, arg5) ({ \ 324 typeof(arg2) v2_ = (arg2); \ 325 typeof(arg3) v3_ = (arg3); \ 326 typeof(arg4) v4_ = (arg4); \ 327 typeof(arg5) v5_ = (arg5); \ 328 ALT_CALL_ARG(arg1, 1); \ 329 ALT_CALL_ARG(v2_, 2); \ 330 ALT_CALL_ARG(v3_, 3); \ 331 ALT_CALL_ARG(v4_, 4); \ 332 ALT_CALL_ARG(v5_, 5); \ 333 ALT_CALL_NO_ARG6; \ 334 alternative_callN(5, typeof(func(arg1, arg2, arg3, \ 335 arg4, arg5)), \ 336 func); \ 337 }) 338 339 #define alternative_vcall6(func, arg1, arg2, arg3, arg4, arg5, arg6) ({ \ 340 typeof(arg2) v2_ = (arg2); \ 341 typeof(arg3) v3_ = (arg3); \ 342 typeof(arg4) v4_ = (arg4); \ 343 typeof(arg5) v5_ = (arg5); \ 344 typeof(arg6) v6_ = (arg6); \ 345 ALT_CALL_ARG(arg1, 1); \ 346 ALT_CALL_ARG(v2_, 2); \ 347 ALT_CALL_ARG(v3_, 3); \ 348 ALT_CALL_ARG(v4_, 4); \ 349 ALT_CALL_ARG(v5_, 5); \ 350 ALT_CALL_ARG(v6_, 6); \ 351 (void)sizeof(func(arg1, arg2, arg3, arg4, arg5, arg6)); \ 352 (void)alternative_callN(6, int, func); \ 353 }) 354 355 #define alternative_call6(func, arg1, arg2, arg3, arg4, arg5, arg6) ({ \ 356 typeof(arg2) v2_ = (arg2); \ 357 typeof(arg3) v3_ = (arg3); \ 358 typeof(arg4) v4_ = (arg4); \ 359 typeof(arg5) v5_ = (arg5); \ 360 typeof(arg6) v6_ = (arg6); \ 361 ALT_CALL_ARG(arg1, 1); \ 362 ALT_CALL_ARG(v2_, 2); \ 363 ALT_CALL_ARG(v3_, 3); \ 364 ALT_CALL_ARG(v4_, 4); \ 365 ALT_CALL_ARG(v5_, 5); \ 366 ALT_CALL_ARG(v6_, 6); \ 367 alternative_callN(6, typeof(func(arg1, arg2, arg3, \ 368 arg4, arg5, arg6)), \ 369 func); \ 370 }) 371 372 #define alternative_vcall__(nr) alternative_vcall ## nr 373 #define alternative_call__(nr) alternative_call ## nr 374 375 #define alternative_vcall_(nr) alternative_vcall__(nr) 376 #define alternative_call_(nr) alternative_call__(nr) 377 378 #define alternative_vcall(func, args...) \ 379 alternative_vcall_(count_args(args))(func, ## args) 380 381 #define alternative_call(func, args...) \ 382 alternative_call_(count_args(args))(func, ## args) 383 384 #endif /* !__ASSEMBLY__ */ 385 386 #endif /* __X86_ALTERNATIVE_H__ */ 387