1 /* Copyright (C) 2016-2021 Free Software Foundation, Inc.
2    This file is part of the GNU C Library.
3 
4    The GNU C Library is free software; you can redistribute it and/or
5    modify it under the terms of the GNU Lesser General Public
6    License as published by the Free Software Foundation; either
7    version 2.1 of the License, or (at your option) any later version.
8 
9    The GNU C Library is distributed in the hope that it will be useful,
10    but WITHOUT ANY WARRANTY; without even the implied warranty of
11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12    Lesser General Public License for more details.
13 
14    You should have received a copy of the GNU Lesser General Public
15    License along with the GNU C Library; if not, see
16    <https://www.gnu.org/licenses/>.  */
17 
18 /*
19  * Copyright (c) 1985, 1989, 1993
20  *    The Regents of the University of California.  All rights reserved.
21  *
22  * Redistribution and use in source and binary forms, with or without
23  * modification, are permitted provided that the following conditions
24  * are met:
25  * 1. Redistributions of source code must retain the above copyright
26  *    notice, this list of conditions and the following disclaimer.
27  * 2. Redistributions in binary form must reproduce the above copyright
28  *    notice, this list of conditions and the following disclaimer in the
29  *    documentation and/or other materials provided with the distribution.
30  * 4. Neither the name of the University nor the names of its contributors
31  *    may be used to endorse or promote products derived from this software
32  *    without specific prior written permission.
33  *
34  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
35  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
36  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
37  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
38  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
39  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
40  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
41  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
42  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
43  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
44  * SUCH DAMAGE.
45  */
46 
47 /*
48  * Portions Copyright (c) 1993 by Digital Equipment Corporation.
49  *
50  * Permission to use, copy, modify, and distribute this software for any
51  * purpose with or without fee is hereby granted, provided that the above
52  * copyright notice and this permission notice appear in all copies, and that
53  * the name of Digital Equipment Corporation not be used in advertising or
54  * publicity pertaining to distribution of the document or software without
55  * specific, written prior permission.
56  *
57  * THE SOFTWARE IS PROVIDED "AS IS" AND DIGITAL EQUIPMENT CORP. DISCLAIMS ALL
58  * WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES
59  * OF MERCHANTABILITY AND FITNESS.   IN NO EVENT SHALL DIGITAL EQUIPMENT
60  * CORPORATION BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
61  * DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
62  * PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
63  * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
64  * SOFTWARE.
65  */
66 
67 /*
68  * Portions Copyright (c) 1996-1999 by Internet Software Consortium.
69  *
70  * Permission to use, copy, modify, and distribute this software for any
71  * purpose with or without fee is hereby granted, provided that the above
72  * copyright notice and this permission notice appear in all copies.
73  *
74  * THE SOFTWARE IS PROVIDED "AS IS" AND INTERNET SOFTWARE CONSORTIUM DISCLAIMS
75  * ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES
76  * OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL INTERNET SOFTWARE
77  * CONSORTIUM BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
78  * DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
79  * PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
80  * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
81  * SOFTWARE.
82  */
83 
84 /*
85  * Send query to name server and wait for reply.
86  */
87 
88 #include <assert.h>
89 #include <sys/types.h>
90 #include <sys/param.h>
91 #include <sys/time.h>
92 #include <sys/socket.h>
93 #include <sys/uio.h>
94 #include <sys/poll.h>
95 
96 #include <netinet/in.h>
97 #include <arpa/nameser.h>
98 #include <arpa/inet.h>
99 #include <sys/ioctl.h>
100 
101 #include <errno.h>
102 #include <fcntl.h>
103 #include <netdb.h>
104 #include <resolv/resolv-internal.h>
105 #include <resolv/resolv_context.h>
106 #include <signal.h>
107 #include <stdlib.h>
108 #include <string.h>
109 #include <unistd.h>
110 #include <kernel-features.h>
111 #include <libc-diag.h>
112 #include <random-bits.h>
113 
114 #if PACKETSZ > 65536
115 #define MAXPACKET       PACKETSZ
116 #else
117 #define MAXPACKET       65536
118 #endif
119 
120 /* From ev_streams.c.  */
121 
122 static inline void
123 __attribute ((always_inline))
evConsIovec(void * buf,size_t cnt,struct iovec * vec)124 evConsIovec(void *buf, size_t cnt, struct iovec *vec) {
125 	memset(vec, 0xf5, sizeof (*vec));
126 	vec->iov_base = buf;
127 	vec->iov_len = cnt;
128 }
129 
130 /* From ev_timers.c.  */
131 
132 #define BILLION 1000000000
133 
134 static inline void
evConsTime(struct timespec * res,time_t sec,long nsec)135 evConsTime(struct timespec *res, time_t sec, long nsec) {
136 	res->tv_sec = sec;
137 	res->tv_nsec = nsec;
138 }
139 
140 static inline void
evAddTime(struct timespec * res,const struct timespec * addend1,const struct timespec * addend2)141 evAddTime(struct timespec *res, const struct timespec *addend1,
142 	  const struct timespec *addend2) {
143 	res->tv_sec = addend1->tv_sec + addend2->tv_sec;
144 	res->tv_nsec = addend1->tv_nsec + addend2->tv_nsec;
145 	if (res->tv_nsec >= BILLION) {
146 		res->tv_sec++;
147 		res->tv_nsec -= BILLION;
148 	}
149 }
150 
151 static inline void
evSubTime(struct timespec * res,const struct timespec * minuend,const struct timespec * subtrahend)152 evSubTime(struct timespec *res, const struct timespec *minuend,
153 	  const struct timespec *subtrahend) {
154        res->tv_sec = minuend->tv_sec - subtrahend->tv_sec;
155 	if (minuend->tv_nsec >= subtrahend->tv_nsec)
156 		res->tv_nsec = minuend->tv_nsec - subtrahend->tv_nsec;
157 	else {
158 		res->tv_nsec = (BILLION
159 				- subtrahend->tv_nsec + minuend->tv_nsec);
160 		res->tv_sec--;
161 	}
162 }
163 
164 static int
evCmpTime(struct timespec a,struct timespec b)165 evCmpTime(struct timespec a, struct timespec b) {
166 	long x = a.tv_sec - b.tv_sec;
167 
168 	if (x == 0L)
169 		x = a.tv_nsec - b.tv_nsec;
170 	return (x < 0L ? (-1) : x > 0L ? (1) : (0));
171 }
172 
173 static void
evNowTime(struct timespec * res)174 evNowTime(struct timespec *res) {
175 	__clock_gettime(CLOCK_REALTIME, res);
176 }
177 
178 
179 #define EXT(res) ((res)->_u._ext)
180 
181 /* Forward. */
182 
183 static int		send_vc(res_state, const u_char *, int,
184 				const u_char *, int,
185 				u_char **, int *, int *, int, u_char **,
186 				u_char **, int *, int *, int *);
187 static int		send_dg(res_state, const u_char *, int,
188 				const u_char *, int,
189 				u_char **, int *, int *, int,
190 				int *, int *, u_char **,
191 				u_char **, int *, int *, int *);
192 static int		sock_eq(struct sockaddr_in6 *, struct sockaddr_in6 *);
193 
194 /* Returns a shift value for the name server index.  Used to implement
195    RES_ROTATE.  */
196 static unsigned int
nameserver_offset(struct __res_state * statp)197 nameserver_offset (struct __res_state *statp)
198 {
199   /* If we only have one name server or rotation is disabled, return
200      offset 0 (no rotation).  */
201   unsigned int nscount = statp->nscount;
202   if (nscount <= 1 || !(statp->options & RES_ROTATE))
203     return 0;
204 
205   /* Global offset.  The lowest bit indicates whether the offset has
206      been initialized with a random value.  Use relaxed MO to access
207      global_offset because all we need is a sequence of roughly
208      sequential value.  */
209   static unsigned int global_offset;
210   unsigned int offset = atomic_fetch_add_relaxed (&global_offset, 2);
211   if ((offset & 1) == 0)
212     {
213       /* Initialization is required.  */
214       offset = random_bits ();
215       /* The lowest bit is the most random.  Preserve it.  */
216       offset <<= 1;
217 
218       /* Store the new starting value.  atomic_fetch_add_relaxed
219 	 returns the old value, so emulate that by storing the new
220 	 (incremented) value.  Concurrent initialization with
221 	 different random values is harmless.  */
222       atomic_store_relaxed (&global_offset, (offset | 1) + 2);
223     }
224 
225   /* Remove the initialization bit.  */
226   offset >>= 1;
227 
228   /* Avoid the division in the most common cases.  */
229   switch (nscount)
230     {
231     case 2:
232       return offset & 1;
233     case 3:
234       return offset % 3;
235     case 4:
236       return offset & 3;
237     default:
238       return offset % nscount;
239     }
240 }
241 
242 /* Clear the AD bit unless the trust-ad option was specified in the
243    resolver configuration.  */
244 static void
mask_ad_bit(struct resolv_context * ctx,void * buf)245 mask_ad_bit (struct resolv_context *ctx, void *buf)
246 {
247   if (!(ctx->resp->options & RES_TRUSTAD))
248     ((HEADER *) buf)->ad = 0;
249 }
250 
251 int
__res_context_send(struct resolv_context * ctx,const unsigned char * buf,int buflen,const unsigned char * buf2,int buflen2,unsigned char * ans,int anssiz,unsigned char ** ansp,unsigned char ** ansp2,int * nansp2,int * resplen2,int * ansp2_malloced)252 __res_context_send (struct resolv_context *ctx,
253 		    const unsigned char *buf, int buflen,
254 		    const unsigned char *buf2, int buflen2,
255 		    unsigned char *ans, int anssiz,
256 		    unsigned char **ansp, unsigned char **ansp2,
257 		    int *nansp2, int *resplen2, int *ansp2_malloced)
258 {
259 	struct __res_state *statp = ctx->resp;
260 	int gotsomewhere, terrno, try, v_circuit, resplen;
261 	/* On some architectures send_vc is inlined and the compiler might emit
262 	   a warning indicating 'resplen' may be used uninitialized.  Note that
263 	   the warning belongs to resplen in send_vc which is used as return
264 	   value!  There the maybe-uninitialized warning is already ignored as
265 	   it is a false-positive - see comment in send_vc.
266 	   Here the variable n is set to the return value of send_vc.
267 	   See below.  */
268 	DIAG_PUSH_NEEDS_COMMENT;
269 	DIAG_IGNORE_NEEDS_COMMENT (9, "-Wmaybe-uninitialized");
270 	int n;
271 	DIAG_POP_NEEDS_COMMENT;
272 
273 	if (statp->nscount == 0) {
274 		__set_errno (ESRCH);
275 		return (-1);
276 	}
277 
278 	if (anssiz < (buf2 == NULL ? 1 : 2) * HFIXEDSZ) {
279 		__set_errno (EINVAL);
280 		return (-1);
281 	}
282 
283 	v_circuit = ((statp->options & RES_USEVC)
284 		     || buflen > PACKETSZ
285 		     || buflen2 > PACKETSZ);
286 	gotsomewhere = 0;
287 	terrno = ETIMEDOUT;
288 
289 	/*
290 	 * If the ns_addr_list in the resolver context has changed, then
291 	 * invalidate our cached copy and the associated timing data.
292 	 */
293 	if (EXT(statp).nscount != 0) {
294 		int needclose = 0;
295 
296 		if (EXT(statp).nscount != statp->nscount)
297 			needclose++;
298 		else
299 			for (unsigned int ns = 0; ns < statp->nscount; ns++) {
300 				if (statp->nsaddr_list[ns].sin_family != 0
301 				    && !sock_eq((struct sockaddr_in6 *)
302 						&statp->nsaddr_list[ns],
303 						EXT(statp).nsaddrs[ns]))
304 				{
305 					needclose++;
306 					break;
307 				}
308 			}
309 		if (needclose) {
310 			__res_iclose(statp, false);
311 			EXT(statp).nscount = 0;
312 		}
313 	}
314 
315 	/*
316 	 * Maybe initialize our private copy of the ns_addr_list.
317 	 */
318 	if (EXT(statp).nscount == 0) {
319 		for (unsigned int ns = 0; ns < statp->nscount; ns++) {
320 			EXT(statp).nssocks[ns] = -1;
321 			if (statp->nsaddr_list[ns].sin_family == 0)
322 				continue;
323 			if (EXT(statp).nsaddrs[ns] == NULL)
324 				EXT(statp).nsaddrs[ns] =
325 				    malloc(sizeof (struct sockaddr_in6));
326 			if (EXT(statp).nsaddrs[ns] != NULL)
327 				memset (mempcpy(EXT(statp).nsaddrs[ns],
328 						&statp->nsaddr_list[ns],
329 						sizeof (struct sockaddr_in)),
330 					'\0',
331 					sizeof (struct sockaddr_in6)
332 					- sizeof (struct sockaddr_in));
333 			else
334 				return -1;
335 		}
336 		EXT(statp).nscount = statp->nscount;
337 	}
338 
339 	/* Name server index offset.  Used to implement
340 	   RES_ROTATE.  */
341 	unsigned int ns_offset = nameserver_offset (statp);
342 
343 	/*
344 	 * Send request, RETRY times, or until successful.
345 	 */
346 	for (try = 0; try < statp->retry; try++) {
347 	    for (unsigned ns_shift = 0; ns_shift < statp->nscount; ns_shift++)
348 	    {
349 		/* The actual name server index.  This implements
350 		   RES_ROTATE.  */
351 		unsigned int ns = ns_shift + ns_offset;
352 		if (ns >= statp->nscount)
353 			ns -= statp->nscount;
354 
355 	    same_ns:
356 		if (__glibc_unlikely (v_circuit))       {
357 			/* Use VC; at most one attempt per server. */
358 			try = statp->retry;
359 			n = send_vc(statp, buf, buflen, buf2, buflen2,
360 				    &ans, &anssiz, &terrno,
361 				    ns, ansp, ansp2, nansp2, resplen2,
362 				    ansp2_malloced);
363 			if (n < 0)
364 				return (-1);
365 			/* See comment at the declaration of n.  */
366 			DIAG_PUSH_NEEDS_COMMENT;
367 			DIAG_IGNORE_NEEDS_COMMENT (9, "-Wmaybe-uninitialized");
368 			if (n == 0 && (buf2 == NULL || *resplen2 == 0))
369 				goto next_ns;
370 			DIAG_POP_NEEDS_COMMENT;
371 		} else {
372 			/* Use datagrams. */
373 			n = send_dg(statp, buf, buflen, buf2, buflen2,
374 				    &ans, &anssiz, &terrno,
375 				    ns, &v_circuit, &gotsomewhere, ansp,
376 				    ansp2, nansp2, resplen2, ansp2_malloced);
377 			if (n < 0)
378 				return (-1);
379 			if (n == 0 && (buf2 == NULL || *resplen2 == 0))
380 				goto next_ns;
381 			if (v_circuit)
382 			  // XXX Check whether both requests failed or
383 			  // XXX whether one has been answered successfully
384 				goto same_ns;
385 		}
386 
387 		resplen = n;
388 
389 		/* See comment at the declaration of n.  Note: resplen = n;  */
390 		DIAG_PUSH_NEEDS_COMMENT;
391 		DIAG_IGNORE_NEEDS_COMMENT (9, "-Wmaybe-uninitialized");
392 		/* Mask the AD bit in both responses unless it is
393 		   marked trusted.  */
394 		if (resplen > HFIXEDSZ)
395 		  {
396 		    if (ansp != NULL)
397 		      mask_ad_bit (ctx, *ansp);
398 		    else
399 		      mask_ad_bit (ctx, ans);
400 		  }
401 		DIAG_POP_NEEDS_COMMENT;
402 		if (resplen2 != NULL && *resplen2 > HFIXEDSZ)
403 		  mask_ad_bit (ctx, *ansp2);
404 
405 		/*
406 		 * If we have temporarily opened a virtual circuit,
407 		 * or if we haven't been asked to keep a socket open,
408 		 * close the socket.
409 		 */
410 		if ((v_circuit && (statp->options & RES_USEVC) == 0) ||
411 		    (statp->options & RES_STAYOPEN) == 0) {
412 			__res_iclose(statp, false);
413 		}
414 		return (resplen);
415  next_ns: ;
416 	   } /*foreach ns*/
417 	} /*foreach retry*/
418 	__res_iclose(statp, false);
419 	if (!v_circuit) {
420 		if (!gotsomewhere)
421 			__set_errno (ECONNREFUSED);	/* no nameservers found */
422 		else
423 			__set_errno (ETIMEDOUT);	/* no answer obtained */
424 	} else
425 		__set_errno (terrno);
426 	return (-1);
427 }
libc_hidden_def(__res_context_send)428 libc_hidden_def (__res_context_send)
429 
430 /* Common part of res_nsend and res_send.  */
431 static int
432 context_send_common (struct resolv_context *ctx,
433 		     const unsigned char *buf, int buflen,
434 		     unsigned char *ans, int anssiz)
435 {
436   if (ctx == NULL)
437     {
438       RES_SET_H_ERRNO (&_res, NETDB_INTERNAL);
439       return -1;
440     }
441   int result = __res_context_send (ctx, buf, buflen, NULL, 0, ans, anssiz,
442 				   NULL, NULL, NULL, NULL, NULL);
443   __resolv_context_put (ctx);
444   return result;
445 }
446 
447 int
___res_nsend(res_state statp,const unsigned char * buf,int buflen,unsigned char * ans,int anssiz)448 ___res_nsend (res_state statp, const unsigned char *buf, int buflen,
449 	      unsigned char *ans, int anssiz)
450 {
451   return context_send_common
452     (__resolv_context_get_override (statp), buf, buflen, ans, anssiz);
453 }
454 versioned_symbol (libc, ___res_nsend, res_nsend, GLIBC_2_34);
455 #if OTHER_SHLIB_COMPAT (libresolv, GLIBC_2_2, GLIBC_2_34)
456 compat_symbol (libresolv, ___res_nsend, __res_nsend, GLIBC_2_2);
457 #endif
458 
459 int
___res_send(const unsigned char * buf,int buflen,unsigned char * ans,int anssiz)460 ___res_send (const unsigned char *buf, int buflen, unsigned char *ans,
461 	     int anssiz)
462 {
463   return context_send_common
464     (__resolv_context_get (), buf, buflen, ans, anssiz);
465 }
466 versioned_symbol (libc, ___res_send, res_send, GLIBC_2_34);
467 #if OTHER_SHLIB_COMPAT (libresolv, GLIBC_2_0, GLIBC_2_34)
468 compat_symbol (libresolv, ___res_send, __res_send, GLIBC_2_0);
469 #endif
470 
471 /* Private */
472 
473 /* Close the resolver structure, assign zero to *RESPLEN2 if RESPLEN2
474    is not NULL, and return zero.  */
475 static int
476 __attribute__ ((warn_unused_result))
close_and_return_error(res_state statp,int * resplen2)477 close_and_return_error (res_state statp, int *resplen2)
478 {
479   __res_iclose(statp, false);
480   if (resplen2 != NULL)
481     *resplen2 = 0;
482   return 0;
483 }
484 
485 /* The send_vc function is responsible for sending a DNS query over TCP
486    to the nameserver numbered NS from the res_state STATP i.e.
487    EXT(statp).nssocks[ns].  The function supports sending both IPv4 and
488    IPv6 queries at the same serially on the same socket.
489 
490    Please note that for TCP there is no way to disable sending both
491    queries, unlike UDP, which honours RES_SNGLKUP and RES_SNGLKUPREOP
492    and sends the queries serially and waits for the result after each
493    sent query.  This implementation should be corrected to honour these
494    options.
495 
496    Please also note that for TCP we send both queries over the same
497    socket one after another.  This technically violates best practice
498    since the server is allowed to read the first query, respond, and
499    then close the socket (to service another client).  If the server
500    does this, then the remaining second query in the socket data buffer
501    will cause the server to send the client an RST which will arrive
502    asynchronously and the client's OS will likely tear down the socket
503    receive buffer resulting in a potentially short read and lost
504    response data.  This will force the client to retry the query again,
505    and this process may repeat until all servers and connection resets
506    are exhausted and then the query will fail.  It's not known if this
507    happens with any frequency in real DNS server implementations.  This
508    implementation should be corrected to use two sockets by default for
509    parallel queries.
510 
511    The query stored in BUF of BUFLEN length is sent first followed by
512    the query stored in BUF2 of BUFLEN2 length.  Queries are sent
513    serially on the same socket.
514 
515    Answers to the query are stored firstly in *ANSP up to a max of
516    *ANSSIZP bytes.  If more than *ANSSIZP bytes are needed and ANSCP
517    is non-NULL (to indicate that modifying the answer buffer is allowed)
518    then malloc is used to allocate a new response buffer and ANSCP and
519    ANSP will both point to the new buffer.  If more than *ANSSIZP bytes
520    are needed but ANSCP is NULL, then as much of the response as
521    possible is read into the buffer, but the results will be truncated.
522    When truncation happens because of a small answer buffer the DNS
523    packets header field TC will bet set to 1, indicating a truncated
524    message and the rest of the socket data will be read and discarded.
525 
526    Answers to the query are stored secondly in *ANSP2 up to a max of
527    *ANSSIZP2 bytes, with the actual response length stored in
528    *RESPLEN2.  If more than *ANSSIZP bytes are needed and ANSP2
529    is non-NULL (required for a second query) then malloc is used to
530    allocate a new response buffer, *ANSSIZP2 is set to the new buffer
531    size and *ANSP2_MALLOCED is set to 1.
532 
533    The ANSP2_MALLOCED argument will eventually be removed as the
534    change in buffer pointer can be used to detect the buffer has
535    changed and that the caller should use free on the new buffer.
536 
537    Note that the answers may arrive in any order from the server and
538    therefore the first and second answer buffers may not correspond to
539    the first and second queries.
540 
541    It is not supported to call this function with a non-NULL ANSP2
542    but a NULL ANSCP.  Put another way, you can call send_vc with a
543    single unmodifiable buffer or two modifiable buffers, but no other
544    combination is supported.
545 
546    It is the caller's responsibility to free the malloc allocated
547    buffers by detecting that the pointers have changed from their
548    original values i.e. *ANSCP or *ANSP2 has changed.
549 
550    If errors are encountered then *TERRNO is set to an appropriate
551    errno value and a zero result is returned for a recoverable error,
552    and a less-than zero result is returned for a non-recoverable error.
553 
554    If no errors are encountered then *TERRNO is left unmodified and
555    a the length of the first response in bytes is returned.  */
556 static int
send_vc(res_state statp,const u_char * buf,int buflen,const u_char * buf2,int buflen2,u_char ** ansp,int * anssizp,int * terrno,int ns,u_char ** anscp,u_char ** ansp2,int * anssizp2,int * resplen2,int * ansp2_malloced)557 send_vc(res_state statp,
558 	const u_char *buf, int buflen, const u_char *buf2, int buflen2,
559 	u_char **ansp, int *anssizp,
560 	int *terrno, int ns, u_char **anscp, u_char **ansp2, int *anssizp2,
561 	int *resplen2, int *ansp2_malloced)
562 {
563 	const HEADER *hp = (HEADER *) buf;
564 	const HEADER *hp2 = (HEADER *) buf2;
565 	HEADER *anhp = (HEADER *) *ansp;
566 	struct sockaddr *nsap = __res_get_nsaddr (statp, ns);
567 	int truncating, connreset, n;
568 	/* On some architectures compiler might emit a warning indicating
569 	   'resplen' may be used uninitialized.  However if buf2 == NULL
570 	   then this code won't be executed; if buf2 != NULL, then first
571 	   time round the loop recvresp1 and recvresp2 will be 0 so this
572 	   code won't be executed but "thisresplenp = &resplen;" followed
573 	   by "*thisresplenp = rlen;" will be executed so that subsequent
574 	   times round the loop resplen has been initialized.  So this is
575 	   a false-positive.
576 	 */
577 	DIAG_PUSH_NEEDS_COMMENT;
578 	DIAG_IGNORE_NEEDS_COMMENT (5, "-Wmaybe-uninitialized");
579 	int resplen;
580 	DIAG_POP_NEEDS_COMMENT;
581 	struct iovec iov[4];
582 	u_short len;
583 	u_short len2;
584 	u_char *cp;
585 
586 	connreset = 0;
587  same_ns:
588 	truncating = 0;
589 
590 	/* Are we still talking to whom we want to talk to? */
591 	if (statp->_vcsock >= 0 && (statp->_flags & RES_F_VC) != 0) {
592 		struct sockaddr_in6 peer;
593 		socklen_t size = sizeof peer;
594 
595 		if (__getpeername (statp->_vcsock,
596 				   (struct sockaddr *) &peer, &size) < 0
597 		    || !sock_eq (&peer, (struct sockaddr_in6 *) nsap)) {
598 			__res_iclose(statp, false);
599 			statp->_flags &= ~RES_F_VC;
600 		}
601 	}
602 
603 	if (statp->_vcsock < 0 || (statp->_flags & RES_F_VC) == 0) {
604 		if (statp->_vcsock >= 0)
605 		  __res_iclose(statp, false);
606 
607 		statp->_vcsock = __socket
608 		  (nsap->sa_family, SOCK_STREAM | SOCK_CLOEXEC, 0);
609 		if (statp->_vcsock < 0) {
610 			*terrno = errno;
611 			if (resplen2 != NULL)
612 			  *resplen2 = 0;
613 			return (-1);
614 		}
615 		__set_errno (0);
616 		if (__connect (statp->_vcsock, nsap,
617 			       nsap->sa_family == AF_INET
618 			       ? sizeof (struct sockaddr_in)
619 			       : sizeof (struct sockaddr_in6)) < 0) {
620 			*terrno = errno;
621 			return close_and_return_error (statp, resplen2);
622 		}
623 		statp->_flags |= RES_F_VC;
624 	}
625 
626 	/*
627 	 * Send length & message
628 	 */
629 	len = htons ((u_short) buflen);
630 	evConsIovec(&len, INT16SZ, &iov[0]);
631 	evConsIovec((void*)buf, buflen, &iov[1]);
632 	int niov = 2;
633 	ssize_t explen = INT16SZ + buflen;
634 	if (buf2 != NULL) {
635 		len2 = htons ((u_short) buflen2);
636 		evConsIovec(&len2, INT16SZ, &iov[2]);
637 		evConsIovec((void*)buf2, buflen2, &iov[3]);
638 		niov = 4;
639 		explen += INT16SZ + buflen2;
640 	}
641 	if (TEMP_FAILURE_RETRY (__writev (statp->_vcsock, iov, niov))
642 	    != explen) {
643 		*terrno = errno;
644 		return close_and_return_error (statp, resplen2);
645 	}
646 	/*
647 	 * Receive length & response
648 	 */
649 	int recvresp1 = 0;
650 	/* Skip the second response if there is no second query.
651 	   To do that we mark the second response as received.  */
652 	int recvresp2 = buf2 == NULL;
653 	uint16_t rlen16;
654  read_len:
655 	cp = (u_char *)&rlen16;
656 	len = sizeof(rlen16);
657 	while ((n = TEMP_FAILURE_RETRY (read(statp->_vcsock, cp,
658 					     (int)len))) > 0) {
659 		cp += n;
660 		if ((len -= n) <= 0)
661 			break;
662 	}
663 	if (n <= 0) {
664 		*terrno = errno;
665 		/*
666 		 * A long running process might get its TCP
667 		 * connection reset if the remote server was
668 		 * restarted.  Requery the server instead of
669 		 * trying a new one.  When there is only one
670 		 * server, this means that a query might work
671 		 * instead of failing.  We only allow one reset
672 		 * per query to prevent looping.
673 		 */
674 		if (*terrno == ECONNRESET && !connreset)
675 		  {
676 		    __res_iclose (statp, false);
677 		    connreset = 1;
678 		    goto same_ns;
679 		  }
680 		return close_and_return_error (statp, resplen2);
681 	}
682 	int rlen = ntohs (rlen16);
683 
684 	int *thisanssizp;
685 	u_char **thisansp;
686 	int *thisresplenp;
687 	if ((recvresp1 | recvresp2) == 0 || buf2 == NULL) {
688 		/* We have not received any responses
689 		   yet or we only have one response to
690 		   receive.  */
691 		thisanssizp = anssizp;
692 		thisansp = anscp ?: ansp;
693 		assert (anscp != NULL || ansp2 == NULL);
694 		thisresplenp = &resplen;
695 	} else {
696 		thisanssizp = anssizp2;
697 		thisansp = ansp2;
698 		thisresplenp = resplen2;
699 	}
700 	anhp = (HEADER *) *thisansp;
701 
702 	*thisresplenp = rlen;
703 	/* Is the answer buffer too small?  */
704 	if (*thisanssizp < rlen) {
705 		/* If the current buffer is not the the static
706 		   user-supplied buffer then we can reallocate
707 		   it.  */
708 		if (thisansp != NULL && thisansp != ansp) {
709 			/* Always allocate MAXPACKET, callers expect
710 			   this specific size.  */
711 			u_char *newp = malloc (MAXPACKET);
712 			if (newp == NULL)
713 			  {
714 			    *terrno = ENOMEM;
715 			    return close_and_return_error (statp, resplen2);
716 			  }
717 			*thisanssizp = MAXPACKET;
718 			*thisansp = newp;
719 			if (thisansp == ansp2)
720 			  *ansp2_malloced = 1;
721 			anhp = (HEADER *) newp;
722 			/* A uint16_t can't be larger than MAXPACKET
723 			   thus it's safe to allocate MAXPACKET but
724 			   read RLEN bytes instead.  */
725 			len = rlen;
726 		} else {
727 			truncating = 1;
728 			len = *thisanssizp;
729 		}
730 	} else
731 		len = rlen;
732 
733 	if (__glibc_unlikely (len < HFIXEDSZ))       {
734 		/*
735 		 * Undersized message.
736 		 */
737 		*terrno = EMSGSIZE;
738 		return close_and_return_error (statp, resplen2);
739 	}
740 
741 	cp = *thisansp;
742 	while (len != 0 && (n = read(statp->_vcsock, (char *)cp, (int)len)) > 0){
743 		cp += n;
744 		len -= n;
745 	}
746 	if (__glibc_unlikely (n <= 0))       {
747 		*terrno = errno;
748 		return close_and_return_error (statp, resplen2);
749 	}
750 	if (__glibc_unlikely (truncating))       {
751 		/*
752 		 * Flush rest of answer so connection stays in synch.
753 		 */
754 		anhp->tc = 1;
755 		len = rlen - *thisanssizp;
756 		while (len != 0) {
757 			char junk[PACKETSZ];
758 
759 			n = read(statp->_vcsock, junk,
760 				 (len > sizeof junk) ? sizeof junk : len);
761 			if (n > 0)
762 				len -= n;
763 			else
764 				break;
765 		}
766 	}
767 	/*
768 	 * If the calling application has bailed out of
769 	 * a previous call and failed to arrange to have
770 	 * the circuit closed or the server has got
771 	 * itself confused, then drop the packet and
772 	 * wait for the correct one.
773 	 */
774 	if ((recvresp1 || hp->id != anhp->id)
775 	    && (recvresp2 || hp2->id != anhp->id))
776 		goto read_len;
777 
778 	/* Mark which reply we received.  */
779 	if (recvresp1 == 0 && hp->id == anhp->id)
780 	  recvresp1 = 1;
781 	else
782 	  recvresp2 = 1;
783 	/* Repeat waiting if we have a second answer to arrive.  */
784 	if ((recvresp1 & recvresp2) == 0)
785 		goto read_len;
786 
787 	/*
788 	 * All is well, or the error is fatal.  Signal that the
789 	 * next nameserver ought not be tried.
790 	 */
791 	return resplen;
792 }
793 
794 static int
reopen(res_state statp,int * terrno,int ns)795 reopen (res_state statp, int *terrno, int ns)
796 {
797 	if (EXT(statp).nssocks[ns] == -1) {
798 		struct sockaddr *nsap = __res_get_nsaddr (statp, ns);
799 		socklen_t slen;
800 
801 		/* only try IPv6 if IPv6 NS and if not failed before */
802 		if (nsap->sa_family == AF_INET6 && !statp->ipv6_unavail) {
803 			EXT (statp).nssocks[ns] = __socket
804 			  (PF_INET6,
805 			   SOCK_DGRAM | SOCK_NONBLOCK | SOCK_CLOEXEC, 0);
806 			if (EXT(statp).nssocks[ns] < 0)
807 			    statp->ipv6_unavail = errno == EAFNOSUPPORT;
808 			slen = sizeof (struct sockaddr_in6);
809 		} else if (nsap->sa_family == AF_INET) {
810 			EXT (statp).nssocks[ns] = __socket
811 			  (PF_INET,
812 			   SOCK_DGRAM | SOCK_NONBLOCK | SOCK_CLOEXEC, 0);
813 			slen = sizeof (struct sockaddr_in);
814 		}
815 		if (EXT(statp).nssocks[ns] < 0) {
816 			*terrno = errno;
817 			return (-1);
818 		}
819 
820 		/* Enable full ICMP error reporting for this
821 		   socket.  */
822 		if (__res_enable_icmp (nsap->sa_family,
823 				       EXT (statp).nssocks[ns]) < 0)
824 		  {
825 		    int saved_errno = errno;
826 		    __res_iclose (statp, false);
827 		    __set_errno (saved_errno);
828 		    *terrno = saved_errno;
829 		    return -1;
830 		  }
831 
832 		/*
833 		 * On a 4.3BSD+ machine (client and server,
834 		 * actually), sending to a nameserver datagram
835 		 * port with no nameserver will cause an
836 		 * ICMP port unreachable message to be returned.
837 		 * If our datagram socket is "connected" to the
838 		 * server, we get an ECONNREFUSED error on the next
839 		 * socket operation, and select returns if the
840 		 * error message is received.  We can thus detect
841 		 * the absence of a nameserver without timing out.
842 		 */
843 		/* With GCC 5.3 when compiling with -Os the compiler
844 		   emits a warning that slen may be used uninitialized,
845 		   but that is never true.  Both slen and
846 		   EXT(statp).nssocks[ns] are initialized together or
847 		   the function return -1 before control flow reaches
848 		   the call to connect with slen.  */
849 		DIAG_PUSH_NEEDS_COMMENT;
850 		DIAG_IGNORE_Os_NEEDS_COMMENT (5, "-Wmaybe-uninitialized");
851 		if (__connect (EXT (statp).nssocks[ns], nsap, slen) < 0) {
852 		DIAG_POP_NEEDS_COMMENT;
853 			__res_iclose(statp, false);
854 			return (0);
855 		}
856 	}
857 
858 	return 1;
859 }
860 
861 /* The send_dg function is responsible for sending a DNS query over UDP
862    to the nameserver numbered NS from the res_state STATP i.e.
863    EXT(statp).nssocks[ns].  The function supports IPv4 and IPv6 queries
864    along with the ability to send the query in parallel for both stacks
865    (default) or serially (RES_SINGLKUP).  It also supports serial lookup
866    with a close and reopen of the socket used to talk to the server
867    (RES_SNGLKUPREOP) to work around broken name servers.
868 
869    The query stored in BUF of BUFLEN length is sent first followed by
870    the query stored in BUF2 of BUFLEN2 length.  Queries are sent
871    in parallel (default) or serially (RES_SINGLKUP or RES_SNGLKUPREOP).
872 
873    Answers to the query are stored firstly in *ANSP up to a max of
874    *ANSSIZP bytes.  If more than *ANSSIZP bytes are needed and ANSCP
875    is non-NULL (to indicate that modifying the answer buffer is allowed)
876    then malloc is used to allocate a new response buffer and ANSCP and
877    ANSP will both point to the new buffer.  If more than *ANSSIZP bytes
878    are needed but ANSCP is NULL, then as much of the response as
879    possible is read into the buffer, but the results will be truncated.
880    When truncation happens because of a small answer buffer the DNS
881    packets header field TC will bet set to 1, indicating a truncated
882    message, while the rest of the UDP packet is discarded.
883 
884    Answers to the query are stored secondly in *ANSP2 up to a max of
885    *ANSSIZP2 bytes, with the actual response length stored in
886    *RESPLEN2.  If more than *ANSSIZP bytes are needed and ANSP2
887    is non-NULL (required for a second query) then malloc is used to
888    allocate a new response buffer, *ANSSIZP2 is set to the new buffer
889    size and *ANSP2_MALLOCED is set to 1.
890 
891    The ANSP2_MALLOCED argument will eventually be removed as the
892    change in buffer pointer can be used to detect the buffer has
893    changed and that the caller should use free on the new buffer.
894 
895    Note that the answers may arrive in any order from the server and
896    therefore the first and second answer buffers may not correspond to
897    the first and second queries.
898 
899    It is not supported to call this function with a non-NULL ANSP2
900    but a NULL ANSCP.  Put another way, you can call send_vc with a
901    single unmodifiable buffer or two modifiable buffers, but no other
902    combination is supported.
903 
904    It is the caller's responsibility to free the malloc allocated
905    buffers by detecting that the pointers have changed from their
906    original values i.e. *ANSCP or *ANSP2 has changed.
907 
908    If an answer is truncated because of UDP datagram DNS limits then
909    *V_CIRCUIT is set to 1 and the return value non-zero to indicate to
910    the caller to retry with TCP.  The value *GOTSOMEWHERE is set to 1
911    if any progress was made reading a response from the nameserver and
912    is used by the caller to distinguish between ECONNREFUSED and
913    ETIMEDOUT (the latter if *GOTSOMEWHERE is 1).
914 
915    If errors are encountered then *TERRNO is set to an appropriate
916    errno value and a zero result is returned for a recoverable error,
917    and a less-than zero result is returned for a non-recoverable error.
918 
919    If no errors are encountered then *TERRNO is left unmodified and
920    a the length of the first response in bytes is returned.  */
921 static int
send_dg(res_state statp,const u_char * buf,int buflen,const u_char * buf2,int buflen2,u_char ** ansp,int * anssizp,int * terrno,int ns,int * v_circuit,int * gotsomewhere,u_char ** anscp,u_char ** ansp2,int * anssizp2,int * resplen2,int * ansp2_malloced)922 send_dg(res_state statp,
923 	const u_char *buf, int buflen, const u_char *buf2, int buflen2,
924 	u_char **ansp, int *anssizp,
925 	int *terrno, int ns, int *v_circuit, int *gotsomewhere, u_char **anscp,
926 	u_char **ansp2, int *anssizp2, int *resplen2, int *ansp2_malloced)
927 {
928 	const HEADER *hp = (HEADER *) buf;
929 	const HEADER *hp2 = (HEADER *) buf2;
930 	struct timespec now, timeout, finish;
931 	struct pollfd pfd[1];
932 	int ptimeout;
933 	struct sockaddr_in6 from;
934 	int resplen = 0;
935 	int n;
936 
937 	/*
938 	 * Compute time for the total operation.
939 	 */
940 	int seconds = (statp->retrans << ns);
941 	if (ns > 0)
942 		seconds /= statp->nscount;
943 	if (seconds <= 0)
944 		seconds = 1;
945 	bool single_request_reopen = (statp->options & RES_SNGLKUPREOP) != 0;
946 	bool single_request = (((statp->options & RES_SNGLKUP) != 0)
947 			       | single_request_reopen);
948 	int save_gotsomewhere = *gotsomewhere;
949 
950 	int retval;
951  retry_reopen:
952 	retval = reopen (statp, terrno, ns);
953 	if (retval <= 0)
954 	  {
955 	    if (resplen2 != NULL)
956 	      *resplen2 = 0;
957 	    return retval;
958 	  }
959  retry:
960 	evNowTime(&now);
961 	evConsTime(&timeout, seconds, 0);
962 	evAddTime(&finish, &now, &timeout);
963 	int need_recompute = 0;
964 	int nwritten = 0;
965 	int recvresp1 = 0;
966 	/* Skip the second response if there is no second query.
967 	   To do that we mark the second response as received.  */
968 	int recvresp2 = buf2 == NULL;
969 	pfd[0].fd = EXT(statp).nssocks[ns];
970 	pfd[0].events = POLLOUT;
971  wait:
972 	if (need_recompute) {
973 	recompute_resend:
974 		evNowTime(&now);
975 		if (evCmpTime(finish, now) <= 0) {
976 		poll_err_out:
977 			return close_and_return_error (statp, resplen2);
978 		}
979 		evSubTime(&timeout, &finish, &now);
980 		need_recompute = 0;
981 	}
982 	/* Convert struct timespec in milliseconds.  */
983 	ptimeout = timeout.tv_sec * 1000 + timeout.tv_nsec / 1000000;
984 
985 	n = 0;
986 	if (nwritten == 0)
987 	  n = __poll (pfd, 1, 0);
988 	if (__glibc_unlikely (n == 0))       {
989 		n = __poll (pfd, 1, ptimeout);
990 		need_recompute = 1;
991 	}
992 	if (n == 0) {
993 		if (resplen > 1 && (recvresp1 || (buf2 != NULL && recvresp2)))
994 		  {
995 		    /* There are quite a few broken name servers out
996 		       there which don't handle two outstanding
997 		       requests from the same source.  There are also
998 		       broken firewall settings.  If we time out after
999 		       having received one answer switch to the mode
1000 		       where we send the second request only once we
1001 		       have received the first answer.  */
1002 		    if (!single_request)
1003 		      {
1004 			statp->options |= RES_SNGLKUP;
1005 			single_request = true;
1006 			*gotsomewhere = save_gotsomewhere;
1007 			goto retry;
1008 		      }
1009 		    else if (!single_request_reopen)
1010 		      {
1011 			statp->options |= RES_SNGLKUPREOP;
1012 			single_request_reopen = true;
1013 			*gotsomewhere = save_gotsomewhere;
1014 			__res_iclose (statp, false);
1015 			goto retry_reopen;
1016 		      }
1017 
1018 		    *resplen2 = 1;
1019 		    return resplen;
1020 		  }
1021 
1022 		*gotsomewhere = 1;
1023 		if (resplen2 != NULL)
1024 		  *resplen2 = 0;
1025 		return 0;
1026 	}
1027 	if (n < 0) {
1028 		if (errno == EINTR)
1029 			goto recompute_resend;
1030 
1031 		goto poll_err_out;
1032 	}
1033 	__set_errno (0);
1034 	if (pfd[0].revents & POLLOUT) {
1035 #ifndef __ASSUME_SENDMMSG
1036 		static int have_sendmmsg;
1037 #else
1038 # define have_sendmmsg 1
1039 #endif
1040 		if (have_sendmmsg >= 0 && nwritten == 0 && buf2 != NULL
1041 		    && !single_request)
1042 		  {
1043 		    struct iovec iov =
1044 		      { .iov_base = (void *) buf, .iov_len = buflen };
1045 		    struct iovec iov2 =
1046 		      { .iov_base = (void *) buf2, .iov_len = buflen2 };
1047 		    struct mmsghdr reqs[2] =
1048 		      {
1049 			{
1050 			  .msg_hdr =
1051 			    {
1052 			      .msg_iov = &iov,
1053 			      .msg_iovlen = 1,
1054 			    },
1055 			},
1056 			{
1057 			  .msg_hdr =
1058 			    {
1059 			      .msg_iov = &iov2,
1060 			      .msg_iovlen = 1,
1061 			    }
1062 			},
1063 		      };
1064 
1065 		    int ndg = __sendmmsg (pfd[0].fd, reqs, 2, MSG_NOSIGNAL);
1066 		    if (__glibc_likely (ndg == 2))
1067 		      {
1068 			if (reqs[0].msg_len != buflen
1069 			    || reqs[1].msg_len != buflen2)
1070 			  goto fail_sendmmsg;
1071 
1072 			pfd[0].events = POLLIN;
1073 			nwritten += 2;
1074 		      }
1075 		    else if (ndg == 1 && reqs[0].msg_len == buflen)
1076 		      goto just_one;
1077 		    else if (ndg < 0 && (errno == EINTR || errno == EAGAIN))
1078 		      goto recompute_resend;
1079 		    else
1080 		      {
1081 #ifndef __ASSUME_SENDMMSG
1082 			if (__glibc_unlikely (have_sendmmsg == 0))
1083 			  {
1084 			    if (ndg < 0 && errno == ENOSYS)
1085 			      {
1086 				have_sendmmsg = -1;
1087 				goto try_send;
1088 			      }
1089 			    have_sendmmsg = 1;
1090 			  }
1091 #endif
1092 
1093 		      fail_sendmmsg:
1094 			return close_and_return_error (statp, resplen2);
1095 		      }
1096 		  }
1097 		else
1098 		  {
1099 		    ssize_t sr;
1100 #ifndef __ASSUME_SENDMMSG
1101 		  try_send:
1102 #endif
1103 		    if (nwritten != 0)
1104 		      sr = __send (pfd[0].fd, buf2, buflen2, MSG_NOSIGNAL);
1105 		    else
1106 		      sr = __send (pfd[0].fd, buf, buflen, MSG_NOSIGNAL);
1107 
1108 		    if (sr != (nwritten != 0 ? buflen2 : buflen)) {
1109 		      if (errno == EINTR || errno == EAGAIN)
1110 			goto recompute_resend;
1111 		      return close_and_return_error (statp, resplen2);
1112 		    }
1113 		  just_one:
1114 		    if (nwritten != 0 || buf2 == NULL || single_request)
1115 		      pfd[0].events = POLLIN;
1116 		    else
1117 		      pfd[0].events = POLLIN | POLLOUT;
1118 		    ++nwritten;
1119 		  }
1120 		goto wait;
1121 	} else if (pfd[0].revents & POLLIN) {
1122 		int *thisanssizp;
1123 		u_char **thisansp;
1124 		int *thisresplenp;
1125 
1126 		if ((recvresp1 | recvresp2) == 0 || buf2 == NULL) {
1127 			/* We have not received any responses
1128 			   yet or we only have one response to
1129 			   receive.  */
1130 			thisanssizp = anssizp;
1131 			thisansp = anscp ?: ansp;
1132 			assert (anscp != NULL || ansp2 == NULL);
1133 			thisresplenp = &resplen;
1134 		} else {
1135 			thisanssizp = anssizp2;
1136 			thisansp = ansp2;
1137 			thisresplenp = resplen2;
1138 		}
1139 
1140 		if (*thisanssizp < MAXPACKET
1141 		    /* If the current buffer is not the the static
1142 		       user-supplied buffer then we can reallocate
1143 		       it.  */
1144 		    && (thisansp != NULL && thisansp != ansp)
1145 #ifdef FIONREAD
1146 		    /* Is the size too small?  */
1147 		    && (__ioctl (pfd[0].fd, FIONREAD, thisresplenp) < 0
1148 			|| *thisanssizp < *thisresplenp)
1149 #endif
1150                     ) {
1151 			/* Always allocate MAXPACKET, callers expect
1152 			   this specific size.  */
1153 			u_char *newp = malloc (MAXPACKET);
1154 			if (newp != NULL) {
1155 				*thisanssizp = MAXPACKET;
1156 				*thisansp = newp;
1157 				if (thisansp == ansp2)
1158 				  *ansp2_malloced = 1;
1159 			}
1160 		}
1161 		/* We could end up with truncation if anscp was NULL
1162 		   (not allowed to change caller's buffer) and the
1163 		   response buffer size is too small.  This isn't a
1164 		   reliable way to detect truncation because the ioctl
1165 		   may be an inaccurate report of the UDP message size.
1166 		   Therefore we use this only to issue debug output.
1167 		   To do truncation accurately with UDP we need
1168 		   MSG_TRUNC which is only available on Linux.  We
1169 		   can abstract out the Linux-specific feature in the
1170 		   future to detect truncation.  */
1171 		HEADER *anhp = (HEADER *) *thisansp;
1172 		socklen_t fromlen = sizeof(struct sockaddr_in6);
1173 		assert (sizeof(from) <= fromlen);
1174 		*thisresplenp = __recvfrom (pfd[0].fd, (char *) *thisansp,
1175 					    *thisanssizp, 0,
1176 					    (struct sockaddr *) &from,
1177 					    &fromlen);
1178 		if (__glibc_unlikely (*thisresplenp <= 0))       {
1179 			if (errno == EINTR || errno == EAGAIN) {
1180 				need_recompute = 1;
1181 				goto wait;
1182 			}
1183 			return close_and_return_error (statp, resplen2);
1184 		}
1185 		*gotsomewhere = 1;
1186 		if (__glibc_unlikely (*thisresplenp < HFIXEDSZ))       {
1187 			/*
1188 			 * Undersized message.
1189 			 */
1190 			*terrno = EMSGSIZE;
1191 			return close_and_return_error (statp, resplen2);
1192 		}
1193 
1194 		/* Check for the correct header layout and a matching
1195 		   question.  */
1196 		int matching_query = 0; /* Default to no matching query.  */
1197 		if (!recvresp1
1198 		    && anhp->id == hp->id
1199 		    && __libc_res_queriesmatch (buf, buf + buflen,
1200 						*thisansp,
1201 						*thisansp + *thisanssizp))
1202 		  matching_query = 1;
1203 		if (!recvresp2
1204 		    && anhp->id == hp2->id
1205 		    && __libc_res_queriesmatch (buf2, buf2 + buflen2,
1206 						*thisansp,
1207 						*thisansp + *thisanssizp))
1208 		  matching_query = 2;
1209 		if (matching_query == 0)
1210 		  /* Spurious UDP packet.  Drop it and continue
1211 		     waiting.  */
1212 		  {
1213 		    need_recompute = 1;
1214 		    goto wait;
1215 		  }
1216 
1217 		if (anhp->rcode == SERVFAIL ||
1218 		    anhp->rcode == NOTIMP ||
1219 		    anhp->rcode == REFUSED) {
1220 		next_ns:
1221 			if (recvresp1 || (buf2 != NULL && recvresp2)) {
1222 			  *resplen2 = 0;
1223 			  return resplen;
1224 			}
1225 			if (buf2 != NULL)
1226 			  {
1227 			    /* No data from the first reply.  */
1228 			    resplen = 0;
1229 			    /* We are waiting for a possible second reply.  */
1230 			    if (matching_query == 1)
1231 			      recvresp1 = 1;
1232 			    else
1233 			      recvresp2 = 1;
1234 
1235 			    goto wait;
1236 			  }
1237 
1238 			/* don't retry if called from dig */
1239 			if (!statp->pfcode)
1240 			  return close_and_return_error (statp, resplen2);
1241 			__res_iclose(statp, false);
1242 		}
1243 		if (anhp->rcode == NOERROR && anhp->ancount == 0
1244 		    && anhp->aa == 0 && anhp->ra == 0 && anhp->arcount == 0) {
1245 			goto next_ns;
1246 		}
1247 		if (!(statp->options & RES_IGNTC) && anhp->tc) {
1248 			/*
1249 			 * To get the rest of answer,
1250 			 * use TCP with same server.
1251 			 */
1252 			*v_circuit = 1;
1253 			__res_iclose(statp, false);
1254 			// XXX if we have received one reply we could
1255 			// XXX use it and not repeat it over TCP...
1256 			if (resplen2 != NULL)
1257 			  *resplen2 = 0;
1258 			return (1);
1259 		}
1260 		/* Mark which reply we received.  */
1261 		if (matching_query == 1)
1262 			recvresp1 = 1;
1263 		else
1264 			recvresp2 = 1;
1265 		/* Repeat waiting if we have a second answer to arrive.  */
1266 		if ((recvresp1 & recvresp2) == 0) {
1267 			if (single_request) {
1268 				pfd[0].events = POLLOUT;
1269 				if (single_request_reopen) {
1270 					__res_iclose (statp, false);
1271 					retval = reopen (statp, terrno, ns);
1272 					if (retval <= 0)
1273 					  {
1274 					    if (resplen2 != NULL)
1275 					      *resplen2 = 0;
1276 					    return retval;
1277 					  }
1278 					pfd[0].fd = EXT(statp).nssocks[ns];
1279 				}
1280 			}
1281 			goto wait;
1282 		}
1283 		/* All is well.  We have received both responses (if
1284 		   two responses were requested).  */
1285 		return (resplen);
1286 	} else if (pfd[0].revents & (POLLERR | POLLHUP | POLLNVAL))
1287 	  /* Something went wrong.  We can stop trying.  */
1288 	  return close_and_return_error (statp, resplen2);
1289 	else {
1290 		/* poll should not have returned > 0 in this case.  */
1291 		abort ();
1292 	}
1293 }
1294 
1295 static int
sock_eq(struct sockaddr_in6 * a1,struct sockaddr_in6 * a2)1296 sock_eq(struct sockaddr_in6 *a1, struct sockaddr_in6 *a2) {
1297 	if (a1->sin6_family == a2->sin6_family) {
1298 		if (a1->sin6_family == AF_INET)
1299 			return ((((struct sockaddr_in *)a1)->sin_port ==
1300 				 ((struct sockaddr_in *)a2)->sin_port) &&
1301 				(((struct sockaddr_in *)a1)->sin_addr.s_addr ==
1302 				 ((struct sockaddr_in *)a2)->sin_addr.s_addr));
1303 		else
1304 			return ((a1->sin6_port == a2->sin6_port) &&
1305 				!memcmp(&a1->sin6_addr, &a2->sin6_addr,
1306 					sizeof (struct in6_addr)));
1307 	}
1308 	if (a1->sin6_family == AF_INET) {
1309 		struct sockaddr_in6 *sap = a1;
1310 		a1 = a2;
1311 		a2 = sap;
1312 	} /* assumes that AF_INET and AF_INET6 are the only possibilities */
1313 	return ((a1->sin6_port == ((struct sockaddr_in *)a2)->sin_port) &&
1314 		IN6_IS_ADDR_V4MAPPED(&a1->sin6_addr) &&
1315 		(a1->sin6_addr.s6_addr32[3] ==
1316 		 ((struct sockaddr_in *)a2)->sin_addr.s_addr));
1317 }
1318