1! SPARC __mpn_add_n -- Add two limb vectors of the same length > 0 and store 2! sum in a third limb vector. 3! 4! Copyright (C) 1995-2021 Free Software Foundation, Inc. 5! 6! This file is part of the GNU MP Library. 7! 8! The GNU MP Library is free software; you can redistribute it and/or modify 9! it under the terms of the GNU Lesser General Public License as published by 10! the Free Software Foundation; either version 2.1 of the License, or (at your 11! option) any later version. 12! 13! The GNU MP Library is distributed in the hope that it will be useful, but 14! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 16! License for more details. 17! 18! You should have received a copy of the GNU Lesser General Public License 19! along with the GNU MP Library; see the file COPYING.LIB. If not, 20! see <https://www.gnu.org/licenses/>. 21 22 23! INPUT PARAMETERS 24#define RES_PTR %o0 25#define S1_PTR %o1 26#define S2_PTR %o2 27#define SIZE %o3 28 29#include <sysdep.h> 30 31ENTRY(__mpn_add_n) 32 xor S2_PTR,RES_PTR,%g1 33 andcc %g1,4,%g0 34 bne LOC(1) ! branch if alignment differs 35 nop 36! ** V1a ** 37LOC(0): andcc RES_PTR,4,%g0 ! RES_PTR unaligned? Side effect: cy=0 38 be LOC(v1) ! if no, branch 39 nop 40/* Add least significant limb separately to align RES_PTR and S2_PTR */ 41 ld [S1_PTR],%g4 42 add S1_PTR,4,S1_PTR 43 ld [S2_PTR],%g2 44 add S2_PTR,4,S2_PTR 45 add SIZE,-1,SIZE 46 addcc %g4,%g2,%o4 47 st %o4,[RES_PTR] 48 add RES_PTR,4,RES_PTR 49LOC(v1): 50 addx %g0,%g0,%o4 ! save cy in register 51 cmp SIZE,2 ! if SIZE < 2 ... 52 bl LOC(end2) ! ... branch to tail code 53 subcc %g0,%o4,%g0 ! restore cy 54 55 ld [S1_PTR+0],%g4 56 addcc SIZE,-10,SIZE 57 ld [S1_PTR+4],%g1 58 ldd [S2_PTR+0],%g2 59 blt LOC(fin1) 60 subcc %g0,%o4,%g0 ! restore cy 61/* Add blocks of 8 limbs until less than 8 limbs remain */ 62LOC(loop1): 63 addxcc %g4,%g2,%o4 64 ld [S1_PTR+8],%g4 65 addxcc %g1,%g3,%o5 66 ld [S1_PTR+12],%g1 67 ldd [S2_PTR+8],%g2 68 std %o4,[RES_PTR+0] 69 addxcc %g4,%g2,%o4 70 ld [S1_PTR+16],%g4 71 addxcc %g1,%g3,%o5 72 ld [S1_PTR+20],%g1 73 ldd [S2_PTR+16],%g2 74 std %o4,[RES_PTR+8] 75 addxcc %g4,%g2,%o4 76 ld [S1_PTR+24],%g4 77 addxcc %g1,%g3,%o5 78 ld [S1_PTR+28],%g1 79 ldd [S2_PTR+24],%g2 80 std %o4,[RES_PTR+16] 81 addxcc %g4,%g2,%o4 82 ld [S1_PTR+32],%g4 83 addxcc %g1,%g3,%o5 84 ld [S1_PTR+36],%g1 85 ldd [S2_PTR+32],%g2 86 std %o4,[RES_PTR+24] 87 addx %g0,%g0,%o4 ! save cy in register 88 addcc SIZE,-8,SIZE 89 add S1_PTR,32,S1_PTR 90 add S2_PTR,32,S2_PTR 91 add RES_PTR,32,RES_PTR 92 bge LOC(loop1) 93 subcc %g0,%o4,%g0 ! restore cy 94 95LOC(fin1): 96 addcc SIZE,8-2,SIZE 97 blt LOC(end1) 98 subcc %g0,%o4,%g0 ! restore cy 99/* Add blocks of 2 limbs until less than 2 limbs remain */ 100LOC(loope1): 101 addxcc %g4,%g2,%o4 102 ld [S1_PTR+8],%g4 103 addxcc %g1,%g3,%o5 104 ld [S1_PTR+12],%g1 105 ldd [S2_PTR+8],%g2 106 std %o4,[RES_PTR+0] 107 addx %g0,%g0,%o4 ! save cy in register 108 addcc SIZE,-2,SIZE 109 add S1_PTR,8,S1_PTR 110 add S2_PTR,8,S2_PTR 111 add RES_PTR,8,RES_PTR 112 bge LOC(loope1) 113 subcc %g0,%o4,%g0 ! restore cy 114LOC(end1): 115 addxcc %g4,%g2,%o4 116 addxcc %g1,%g3,%o5 117 std %o4,[RES_PTR+0] 118 addx %g0,%g0,%o4 ! save cy in register 119 120 andcc SIZE,1,%g0 121 be LOC(ret1) 122 subcc %g0,%o4,%g0 ! restore cy 123/* Add last limb */ 124 ld [S1_PTR+8],%g4 125 ld [S2_PTR+8],%g2 126 addxcc %g4,%g2,%o4 127 st %o4,[RES_PTR+8] 128 129LOC(ret1): 130 retl 131 addx %g0,%g0,%o0 ! return carry-out from most sign. limb 132 133LOC(1): xor S1_PTR,RES_PTR,%g1 134 andcc %g1,4,%g0 135 bne LOC(2) 136 nop 137! ** V1b ** 138 mov S2_PTR,%g1 139 mov S1_PTR,S2_PTR 140 b LOC(0) 141 mov %g1,S1_PTR 142 143! ** V2 ** 144/* If we come here, the alignment of S1_PTR and RES_PTR as well as the 145 alignment of S2_PTR and RES_PTR differ. Since there are only two ways 146 things can be aligned (that we care about) we now know that the alignment 147 of S1_PTR and S2_PTR are the same. */ 148 149LOC(2): cmp SIZE,1 150 be LOC(jone) 151 nop 152 andcc S1_PTR,4,%g0 ! S1_PTR unaligned? Side effect: cy=0 153 be LOC(v2) ! if no, branch 154 nop 155/* Add least significant limb separately to align S1_PTR and S2_PTR */ 156 ld [S1_PTR],%g4 157 add S1_PTR,4,S1_PTR 158 ld [S2_PTR],%g2 159 add S2_PTR,4,S2_PTR 160 add SIZE,-1,SIZE 161 addcc %g4,%g2,%o4 162 st %o4,[RES_PTR] 163 add RES_PTR,4,RES_PTR 164 165LOC(v2): 166 addx %g0,%g0,%o4 ! save cy in register 167 addcc SIZE,-8,SIZE 168 blt LOC(fin2) 169 subcc %g0,%o4,%g0 ! restore cy 170/* Add blocks of 8 limbs until less than 8 limbs remain */ 171LOC(loop2): 172 ldd [S1_PTR+0],%g2 173 ldd [S2_PTR+0],%o4 174 addxcc %g2,%o4,%g2 175 st %g2,[RES_PTR+0] 176 addxcc %g3,%o5,%g3 177 st %g3,[RES_PTR+4] 178 ldd [S1_PTR+8],%g2 179 ldd [S2_PTR+8],%o4 180 addxcc %g2,%o4,%g2 181 st %g2,[RES_PTR+8] 182 addxcc %g3,%o5,%g3 183 st %g3,[RES_PTR+12] 184 ldd [S1_PTR+16],%g2 185 ldd [S2_PTR+16],%o4 186 addxcc %g2,%o4,%g2 187 st %g2,[RES_PTR+16] 188 addxcc %g3,%o5,%g3 189 st %g3,[RES_PTR+20] 190 ldd [S1_PTR+24],%g2 191 ldd [S2_PTR+24],%o4 192 addxcc %g2,%o4,%g2 193 st %g2,[RES_PTR+24] 194 addxcc %g3,%o5,%g3 195 st %g3,[RES_PTR+28] 196 addx %g0,%g0,%o4 ! save cy in register 197 addcc SIZE,-8,SIZE 198 add S1_PTR,32,S1_PTR 199 add S2_PTR,32,S2_PTR 200 add RES_PTR,32,RES_PTR 201 bge LOC(loop2) 202 subcc %g0,%o4,%g0 ! restore cy 203 204LOC(fin2): 205 addcc SIZE,8-2,SIZE 206 blt LOC(end2) 207 subcc %g0,%o4,%g0 ! restore cy 208LOC(loope2): 209 ldd [S1_PTR+0],%g2 210 ldd [S2_PTR+0],%o4 211 addxcc %g2,%o4,%g2 212 st %g2,[RES_PTR+0] 213 addxcc %g3,%o5,%g3 214 st %g3,[RES_PTR+4] 215 addx %g0,%g0,%o4 ! save cy in register 216 addcc SIZE,-2,SIZE 217 add S1_PTR,8,S1_PTR 218 add S2_PTR,8,S2_PTR 219 add RES_PTR,8,RES_PTR 220 bge LOC(loope2) 221 subcc %g0,%o4,%g0 ! restore cy 222LOC(end2): 223 andcc SIZE,1,%g0 224 be LOC(ret2) 225 subcc %g0,%o4,%g0 ! restore cy 226/* Add last limb */ 227LOC(jone): 228 ld [S1_PTR],%g4 229 ld [S2_PTR],%g2 230 addxcc %g4,%g2,%o4 231 st %o4,[RES_PTR] 232 233LOC(ret2): 234 retl 235 addx %g0,%g0,%o0 ! return carry-out from most sign. limb 236 237END(__mpn_add_n) 238