1 /* memcopy.h -- definitions for memory copy functions.  Generic C version.
2    Copyright (C) 1991-2021 Free Software Foundation, Inc.
3    This file is part of the GNU C Library.
4 
5    The GNU C Library is free software; you can redistribute it and/or
6    modify it under the terms of the GNU Lesser General Public
7    License as published by the Free Software Foundation; either
8    version 2.1 of the License, or (at your option) any later version.
9 
10    The GNU C Library is distributed in the hope that it will be useful,
11    but WITHOUT ANY WARRANTY; without even the implied warranty of
12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13    Lesser General Public License for more details.
14 
15    You should have received a copy of the GNU Lesser General Public
16    License along with the GNU C Library; if not, see
17    <https://www.gnu.org/licenses/>.  */
18 
19 #ifndef _MEMCOPY_H
20 #define _MEMCOPY_H	1
21 
22 /* The strategy of the memory functions is:
23 
24      1. Copy bytes until the destination pointer is aligned.
25 
26      2. Copy words in unrolled loops.  If the source and destination
27      are not aligned in the same way, use word memory operations,
28      but shift and merge two read words before writing.
29 
30      3. Copy the few remaining bytes.
31 
32    This is fast on processors that have at least 10 registers for
33    allocation by GCC, and that can access memory at reg+const in one
34    instruction.
35 
36    I made an "exhaustive" test of this memmove when I wrote it,
37    exhaustive in the sense that I tried all alignment and length
38    combinations, with and without overlap.  */
39 
40 #include <sys/cdefs.h>
41 #include <endian.h>
42 #include <pagecopy.h>
43 
44 /* The macros defined in this file are:
45 
46    BYTE_COPY_FWD(dst_beg_ptr, src_beg_ptr, nbytes_to_copy)
47 
48    BYTE_COPY_BWD(dst_end_ptr, src_end_ptr, nbytes_to_copy)
49 
50    WORD_COPY_FWD(dst_beg_ptr, src_beg_ptr, nbytes_remaining, nbytes_to_copy)
51 
52    WORD_COPY_BWD(dst_end_ptr, src_end_ptr, nbytes_remaining, nbytes_to_copy)
53 
54    MERGE(old_word, sh_1, new_word, sh_2)
55      [I fail to understand.  I feel stupid.  --roland]
56 */
57 
58 /* Type to use for aligned memory operations.
59    This should normally be the biggest type supported by a single load
60    and store.  */
61 #define	op_t	unsigned long int
62 #define OPSIZ	(sizeof (op_t))
63 
64 /* Type to use for unaligned operations.  */
65 typedef unsigned char byte;
66 
67 #if __BYTE_ORDER == __LITTLE_ENDIAN
68 #define MERGE(w0, sh_1, w1, sh_2) (((w0) >> (sh_1)) | ((w1) << (sh_2)))
69 #endif
70 #if __BYTE_ORDER == __BIG_ENDIAN
71 #define MERGE(w0, sh_1, w1, sh_2) (((w0) << (sh_1)) | ((w1) >> (sh_2)))
72 #endif
73 
74 /* Copy exactly NBYTES bytes from SRC_BP to DST_BP,
75    without any assumptions about alignment of the pointers.  */
76 #define BYTE_COPY_FWD(dst_bp, src_bp, nbytes)				      \
77   do									      \
78     {									      \
79       size_t __nbytes = (nbytes);					      \
80       while (__nbytes > 0)						      \
81 	{								      \
82 	  byte __x = ((byte *) src_bp)[0];				      \
83 	  src_bp += 1;							      \
84 	  __nbytes -= 1;						      \
85 	  ((byte *) dst_bp)[0] = __x;					      \
86 	  dst_bp += 1;							      \
87 	}								      \
88     } while (0)
89 
90 /* Copy exactly NBYTES_TO_COPY bytes from SRC_END_PTR to DST_END_PTR,
91    beginning at the bytes right before the pointers and continuing towards
92    smaller addresses.  Don't assume anything about alignment of the
93    pointers.  */
94 #define BYTE_COPY_BWD(dst_ep, src_ep, nbytes)				      \
95   do									      \
96     {									      \
97       size_t __nbytes = (nbytes);					      \
98       while (__nbytes > 0)						      \
99 	{								      \
100 	  byte __x;							      \
101 	  src_ep -= 1;							      \
102 	  __x = ((byte *) src_ep)[0];					      \
103 	  dst_ep -= 1;							      \
104 	  __nbytes -= 1;						      \
105 	  ((byte *) dst_ep)[0] = __x;					      \
106 	}								      \
107     } while (0)
108 
109 /* Copy *up to* NBYTES bytes from SRC_BP to DST_BP, with
110    the assumption that DST_BP is aligned on an OPSIZ multiple.  If
111    not all bytes could be easily copied, store remaining number of bytes
112    in NBYTES_LEFT, otherwise store 0.  */
113 extern void _wordcopy_fwd_aligned (long int, long int, size_t)
114   attribute_hidden __THROW;
115 extern void _wordcopy_fwd_dest_aligned (long int, long int, size_t)
116   attribute_hidden __THROW;
117 #define WORD_COPY_FWD(dst_bp, src_bp, nbytes_left, nbytes)		      \
118   do									      \
119     {									      \
120       if (src_bp % OPSIZ == 0)						      \
121 	_wordcopy_fwd_aligned (dst_bp, src_bp, (nbytes) / OPSIZ);	      \
122       else								      \
123 	_wordcopy_fwd_dest_aligned (dst_bp, src_bp, (nbytes) / OPSIZ);	      \
124       src_bp += (nbytes) & -OPSIZ;					      \
125       dst_bp += (nbytes) & -OPSIZ;					      \
126       (nbytes_left) = (nbytes) % OPSIZ;					      \
127     } while (0)
128 
129 /* Copy *up to* NBYTES_TO_COPY bytes from SRC_END_PTR to DST_END_PTR,
130    beginning at the words (of type op_t) right before the pointers and
131    continuing towards smaller addresses.  May take advantage of that
132    DST_END_PTR is aligned on an OPSIZ multiple.  If not all bytes could be
133    easily copied, store remaining number of bytes in NBYTES_REMAINING,
134    otherwise store 0.  */
135 extern void _wordcopy_bwd_aligned (long int, long int, size_t)
136   attribute_hidden __THROW;
137 extern void _wordcopy_bwd_dest_aligned (long int, long int, size_t)
138   attribute_hidden __THROW;
139 #define WORD_COPY_BWD(dst_ep, src_ep, nbytes_left, nbytes)		      \
140   do									      \
141     {									      \
142       if (src_ep % OPSIZ == 0)						      \
143 	_wordcopy_bwd_aligned (dst_ep, src_ep, (nbytes) / OPSIZ);	      \
144       else								      \
145 	_wordcopy_bwd_dest_aligned (dst_ep, src_ep, (nbytes) / OPSIZ);	      \
146       src_ep -= (nbytes) & -OPSIZ;					      \
147       dst_ep -= (nbytes) & -OPSIZ;					      \
148       (nbytes_left) = (nbytes) % OPSIZ;					      \
149     } while (0)
150 
151 /* The macro PAGE_COPY_FWD_MAYBE (dstp, srcp, nbytes_left, nbytes) is invoked
152    like WORD_COPY_FWD et al.  The pointers should be at least word aligned.
153    This will check if virtual copying by pages can and should be done and do it
154    if so.  The pointers will be aligned to PAGE_SIZE bytes.  The macro requires
155    that pagecopy.h defines at least PAGE_COPY_THRESHOLD to 0.  If
156    PAGE_COPY_THRESHOLD is non-zero, the header must also define PAGE_COPY_FWD
157    and PAGE_SIZE.
158 */
159 #if PAGE_COPY_THRESHOLD
160 
161 # include <assert.h>
162 
163 # define PAGE_COPY_FWD_MAYBE(dstp, srcp, nbytes_left, nbytes)		      \
164   do									      \
165     {									      \
166       if ((nbytes) >= PAGE_COPY_THRESHOLD				      \
167 	  && PAGE_OFFSET ((dstp) - (srcp)) == 0)			      \
168 	{								      \
169 	  /* The amount to copy is past the threshold for copying	      \
170 	     pages virtually with kernel VM operations, and the		      \
171 	     source and destination addresses have the same alignment.  */    \
172 	  size_t nbytes_before = PAGE_OFFSET (-(dstp));			      \
173 	  if (nbytes_before != 0)					      \
174 	    {								      \
175 	      /* First copy the words before the first page boundary.  */     \
176 	      WORD_COPY_FWD (dstp, srcp, nbytes_left, nbytes_before);	      \
177 	      assert (nbytes_left == 0);				      \
178 	      nbytes -= nbytes_before;					      \
179 	    }								      \
180 	  PAGE_COPY_FWD (dstp, srcp, nbytes_left, nbytes);		      \
181 	}								      \
182     } while (0)
183 
184 /* The page size is always a power of two, so we can avoid modulo division.  */
185 # define PAGE_OFFSET(n)	((n) & (PAGE_SIZE - 1))
186 
187 #else
188 
189 # define PAGE_COPY_FWD_MAYBE(dstp, srcp, nbytes_left, nbytes) /* nada */
190 
191 #endif
192 
193 /* Threshold value for when to enter the unrolled loops.  */
194 #define	OP_T_THRES	16
195 
196 /* Set to 1 if memcpy is safe to use for forward-copying memmove with
197    overlapping addresses.  This is 0 by default because memcpy implementations
198    are generally not safe for overlapping addresses.  */
199 #define MEMCPY_OK_FOR_FWD_MEMMOVE 0
200 
201 #endif /* memcopy.h */
202