1/* SPDX-License-Identifier: GPL-2.0 */ 2// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd. 3 4#include <linux/linkage.h> 5 6.macro GET_FRONT_BITS rx y 7#ifdef __cskyLE__ 8 lsri \rx, \y 9#else 10 lsli \rx, \y 11#endif 12.endm 13 14.macro GET_AFTER_BITS rx y 15#ifdef __cskyLE__ 16 lsli \rx, \y 17#else 18 lsri \rx, \y 19#endif 20.endm 21 22/* void *memcpy(void *dest, const void *src, size_t n); */ 23ENTRY(memcpy) 24 mov r7, r2 25 cmplti r4, 4 26 bt .L_copy_by_byte 27 mov r6, r2 28 andi r6, 3 29 cmpnei r6, 0 30 jbt .L_dest_not_aligned 31 mov r6, r3 32 andi r6, 3 33 cmpnei r6, 0 34 jbt .L_dest_aligned_but_src_not_aligned 35.L0: 36 cmplti r4, 16 37 jbt .L_aligned_and_len_less_16bytes 38 subi sp, 8 39 stw r8, (sp, 0) 40.L_aligned_and_len_larger_16bytes: 41 ldw r1, (r3, 0) 42 ldw r5, (r3, 4) 43 ldw r8, (r3, 8) 44 stw r1, (r7, 0) 45 ldw r1, (r3, 12) 46 stw r5, (r7, 4) 47 stw r8, (r7, 8) 48 stw r1, (r7, 12) 49 subi r4, 16 50 addi r3, 16 51 addi r7, 16 52 cmplti r4, 16 53 jbf .L_aligned_and_len_larger_16bytes 54 ldw r8, (sp, 0) 55 addi sp, 8 56 cmpnei r4, 0 57 jbf .L_return 58 59.L_aligned_and_len_less_16bytes: 60 cmplti r4, 4 61 bt .L_copy_by_byte 62.L1: 63 ldw r1, (r3, 0) 64 stw r1, (r7, 0) 65 subi r4, 4 66 addi r3, 4 67 addi r7, 4 68 cmplti r4, 4 69 jbf .L1 70 br .L_copy_by_byte 71 72.L_return: 73 rts 74 75.L_copy_by_byte: /* len less than 4 bytes */ 76 cmpnei r4, 0 77 jbf .L_return 78.L4: 79 ldb r1, (r3, 0) 80 stb r1, (r7, 0) 81 addi r3, 1 82 addi r7, 1 83 decne r4 84 jbt .L4 85 rts 86 87/* 88 * If dest is not aligned, just copying some bytes makes the dest align. 89 * Afther that, we judge whether the src is aligned. 90 */ 91.L_dest_not_aligned: 92 mov r5, r3 93 rsub r5, r5, r7 94 abs r5, r5 95 cmplt r5, r4 96 bt .L_copy_by_byte 97 mov r5, r7 98 sub r5, r3 99 cmphs r5, r4 100 bf .L_copy_by_byte 101 mov r5, r6 102.L5: 103 ldb r1, (r3, 0) /* makes the dest align. */ 104 stb r1, (r7, 0) 105 addi r5, 1 106 subi r4, 1 107 addi r3, 1 108 addi r7, 1 109 cmpnei r5, 4 110 jbt .L5 111 cmplti r4, 4 112 jbt .L_copy_by_byte 113 mov r6, r3 /* judge whether the src is aligned. */ 114 andi r6, 3 115 cmpnei r6, 0 116 jbf .L0 117 118/* Judge the number of misaligned, 1, 2, 3? */ 119.L_dest_aligned_but_src_not_aligned: 120 mov r5, r3 121 rsub r5, r5, r7 122 abs r5, r5 123 cmplt r5, r4 124 bt .L_copy_by_byte 125 bclri r3, 0 126 bclri r3, 1 127 ldw r1, (r3, 0) 128 addi r3, 4 129 cmpnei r6, 2 130 bf .L_dest_aligned_but_src_not_aligned_2bytes 131 cmpnei r6, 3 132 bf .L_dest_aligned_but_src_not_aligned_3bytes 133 134.L_dest_aligned_but_src_not_aligned_1byte: 135 mov r5, r7 136 sub r5, r3 137 cmphs r5, r4 138 bf .L_copy_by_byte 139 cmplti r4, 16 140 bf .L11 141.L10: /* If the len is less than 16 bytes */ 142 GET_FRONT_BITS r1 8 143 mov r5, r1 144 ldw r6, (r3, 0) 145 mov r1, r6 146 GET_AFTER_BITS r6 24 147 or r5, r6 148 stw r5, (r7, 0) 149 subi r4, 4 150 addi r3, 4 151 addi r7, 4 152 cmplti r4, 4 153 bf .L10 154 subi r3, 3 155 br .L_copy_by_byte 156.L11: 157 subi sp, 16 158 stw r8, (sp, 0) 159 stw r9, (sp, 4) 160 stw r10, (sp, 8) 161 stw r11, (sp, 12) 162.L12: 163 ldw r5, (r3, 0) 164 ldw r11, (r3, 4) 165 ldw r8, (r3, 8) 166 ldw r9, (r3, 12) 167 168 GET_FRONT_BITS r1 8 /* little or big endian? */ 169 mov r10, r5 170 GET_AFTER_BITS r5 24 171 or r5, r1 172 173 GET_FRONT_BITS r10 8 174 mov r1, r11 175 GET_AFTER_BITS r11 24 176 or r11, r10 177 178 GET_FRONT_BITS r1 8 179 mov r10, r8 180 GET_AFTER_BITS r8 24 181 or r8, r1 182 183 GET_FRONT_BITS r10 8 184 mov r1, r9 185 GET_AFTER_BITS r9 24 186 or r9, r10 187 188 stw r5, (r7, 0) 189 stw r11, (r7, 4) 190 stw r8, (r7, 8) 191 stw r9, (r7, 12) 192 subi r4, 16 193 addi r3, 16 194 addi r7, 16 195 cmplti r4, 16 196 jbf .L12 197 ldw r8, (sp, 0) 198 ldw r9, (sp, 4) 199 ldw r10, (sp, 8) 200 ldw r11, (sp, 12) 201 addi sp , 16 202 cmplti r4, 4 203 bf .L10 204 subi r3, 3 205 br .L_copy_by_byte 206 207.L_dest_aligned_but_src_not_aligned_2bytes: 208 cmplti r4, 16 209 bf .L21 210.L20: 211 GET_FRONT_BITS r1 16 212 mov r5, r1 213 ldw r6, (r3, 0) 214 mov r1, r6 215 GET_AFTER_BITS r6 16 216 or r5, r6 217 stw r5, (r7, 0) 218 subi r4, 4 219 addi r3, 4 220 addi r7, 4 221 cmplti r4, 4 222 bf .L20 223 subi r3, 2 224 br .L_copy_by_byte 225 rts 226 227.L21: /* n > 16 */ 228 subi sp, 16 229 stw r8, (sp, 0) 230 stw r9, (sp, 4) 231 stw r10, (sp, 8) 232 stw r11, (sp, 12) 233 234.L22: 235 ldw r5, (r3, 0) 236 ldw r11, (r3, 4) 237 ldw r8, (r3, 8) 238 ldw r9, (r3, 12) 239 240 GET_FRONT_BITS r1 16 241 mov r10, r5 242 GET_AFTER_BITS r5 16 243 or r5, r1 244 245 GET_FRONT_BITS r10 16 246 mov r1, r11 247 GET_AFTER_BITS r11 16 248 or r11, r10 249 250 GET_FRONT_BITS r1 16 251 mov r10, r8 252 GET_AFTER_BITS r8 16 253 or r8, r1 254 255 GET_FRONT_BITS r10 16 256 mov r1, r9 257 GET_AFTER_BITS r9 16 258 or r9, r10 259 260 stw r5, (r7, 0) 261 stw r11, (r7, 4) 262 stw r8, (r7, 8) 263 stw r9, (r7, 12) 264 subi r4, 16 265 addi r3, 16 266 addi r7, 16 267 cmplti r4, 16 268 jbf .L22 269 ldw r8, (sp, 0) 270 ldw r9, (sp, 4) 271 ldw r10, (sp, 8) 272 ldw r11, (sp, 12) 273 addi sp, 16 274 cmplti r4, 4 275 bf .L20 276 subi r3, 2 277 br .L_copy_by_byte 278 279 280.L_dest_aligned_but_src_not_aligned_3bytes: 281 cmplti r4, 16 282 bf .L31 283.L30: 284 GET_FRONT_BITS r1 24 285 mov r5, r1 286 ldw r6, (r3, 0) 287 mov r1, r6 288 GET_AFTER_BITS r6 8 289 or r5, r6 290 stw r5, (r7, 0) 291 subi r4, 4 292 addi r3, 4 293 addi r7, 4 294 cmplti r4, 4 295 bf .L30 296 subi r3, 1 297 br .L_copy_by_byte 298.L31: 299 subi sp, 16 300 stw r8, (sp, 0) 301 stw r9, (sp, 4) 302 stw r10, (sp, 8) 303 stw r11, (sp, 12) 304.L32: 305 ldw r5, (r3, 0) 306 ldw r11, (r3, 4) 307 ldw r8, (r3, 8) 308 ldw r9, (r3, 12) 309 310 GET_FRONT_BITS r1 24 311 mov r10, r5 312 GET_AFTER_BITS r5 8 313 or r5, r1 314 315 GET_FRONT_BITS r10 24 316 mov r1, r11 317 GET_AFTER_BITS r11 8 318 or r11, r10 319 320 GET_FRONT_BITS r1 24 321 mov r10, r8 322 GET_AFTER_BITS r8 8 323 or r8, r1 324 325 GET_FRONT_BITS r10 24 326 mov r1, r9 327 GET_AFTER_BITS r9 8 328 or r9, r10 329 330 stw r5, (r7, 0) 331 stw r11, (r7, 4) 332 stw r8, (r7, 8) 333 stw r9, (r7, 12) 334 subi r4, 16 335 addi r3, 16 336 addi r7, 16 337 cmplti r4, 16 338 jbf .L32 339 ldw r8, (sp, 0) 340 ldw r9, (sp, 4) 341 ldw r10, (sp, 8) 342 ldw r11, (sp, 12) 343 addi sp, 16 344 cmplti r4, 4 345 bf .L30 346 subi r3, 1 347 br .L_copy_by_byte 348