1 // SPDX-License-Identifier: GPL-2.0+
2 /*
3  * BTRFS filesystem implementation for U-Boot
4  *
5  * 2017 Marek Behun, CZ.NIC, marek.behun@nic.cz
6  */
7 
8 #include <linux/kernel.h>
9 #include <malloc.h>
10 #include <memalign.h>
11 #include "btrfs.h"
12 #include "disk-io.h"
13 #include "volumes.h"
14 
15 /*
16  * Read the content of symlink inode @ino of @root, into @target.
17  * NOTE: @target will not be \0 termiated, caller should handle it properly.
18  *
19  * Return the number of read data.
20  * Return <0 for error.
21  */
btrfs_readlink(struct btrfs_root * root,u64 ino,char * target)22 int btrfs_readlink(struct btrfs_root *root, u64 ino, char *target)
23 {
24 	struct btrfs_path path;
25 	struct btrfs_key key;
26 	struct btrfs_file_extent_item *fi;
27 	int ret;
28 
29 	key.objectid = ino;
30 	key.type = BTRFS_EXTENT_DATA_KEY;
31 	key.offset = 0;
32 	btrfs_init_path(&path);
33 
34 	ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
35 	if (ret < 0)
36 		return ret;
37 	if (ret > 0) {
38 		ret = -ENOENT;
39 		goto out;
40 	}
41 	fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
42 			    struct btrfs_file_extent_item);
43 	if (btrfs_file_extent_type(path.nodes[0], fi) !=
44 	    BTRFS_FILE_EXTENT_INLINE) {
45 		ret = -EUCLEAN;
46 		error("Extent for symlink %llu must be INLINE type!", ino);
47 		goto out;
48 	}
49 	if (btrfs_file_extent_compression(path.nodes[0], fi) !=
50 	    BTRFS_COMPRESS_NONE) {
51 		ret = -EUCLEAN;
52 		error("Extent for symlink %llu must not be compressed!", ino);
53 		goto out;
54 	}
55 	if (btrfs_file_extent_ram_bytes(path.nodes[0], fi) >=
56 	    root->fs_info->sectorsize) {
57 		ret = -EUCLEAN;
58 		error("Symlink %llu extent data too large (%llu)!\n",
59 			ino, btrfs_file_extent_ram_bytes(path.nodes[0], fi));
60 		goto out;
61 	}
62 	read_extent_buffer(path.nodes[0], target,
63 			btrfs_file_extent_inline_start(fi),
64 			btrfs_file_extent_ram_bytes(path.nodes[0], fi));
65 	ret = btrfs_file_extent_ram_bytes(path.nodes[0], fi);
66 out:
67 	btrfs_release_path(&path);
68 	return ret;
69 }
70 
lookup_root_ref(struct btrfs_fs_info * fs_info,u64 rootid,u64 * root_ret,u64 * dir_ret)71 static int lookup_root_ref(struct btrfs_fs_info *fs_info,
72 			   u64 rootid, u64 *root_ret, u64 *dir_ret)
73 {
74 	struct btrfs_root *root = fs_info->tree_root;
75 	struct btrfs_root_ref *root_ref;
76 	struct btrfs_path path;
77 	struct btrfs_key key;
78 	int ret;
79 
80 	btrfs_init_path(&path);
81 	key.objectid = rootid;
82 	key.type = BTRFS_ROOT_BACKREF_KEY;
83 	key.offset = (u64)-1;
84 
85 	ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
86 	if (ret < 0)
87 		return ret;
88 	/* Should not happen */
89 	if (ret == 0) {
90 		ret = -EUCLEAN;
91 		goto out;
92 	}
93 	ret = btrfs_previous_item(root, &path, rootid, BTRFS_ROOT_BACKREF_KEY);
94 	if (ret < 0)
95 		goto out;
96 	if (ret > 0) {
97 		ret = -ENOENT;
98 		goto out;
99 	}
100 	btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
101 	root_ref = btrfs_item_ptr(path.nodes[0], path.slots[0],
102 				  struct btrfs_root_ref);
103 	*root_ret = key.offset;
104 	*dir_ret = btrfs_root_ref_dirid(path.nodes[0], root_ref);
105 out:
106 	btrfs_release_path(&path);
107 	return ret;
108 }
109 
110 /*
111  * To get the parent inode of @ino of @root.
112  *
113  * @root_ret and @ino_ret will be filled.
114  *
115  * NOTE: This function is not reliable. It can only get one parent inode.
116  * The get the proper parent inode, we need a full VFS inodes stack to
117  * resolve properly.
118  */
get_parent_inode(struct btrfs_root * root,u64 ino,struct btrfs_root ** root_ret,u64 * ino_ret)119 static int get_parent_inode(struct btrfs_root *root, u64 ino,
120 			    struct btrfs_root **root_ret, u64 *ino_ret)
121 {
122 	struct btrfs_fs_info *fs_info = root->fs_info;
123 	struct btrfs_path path;
124 	struct btrfs_key key;
125 	int ret;
126 
127 	if (ino == BTRFS_FIRST_FREE_OBJECTID) {
128 		u64 parent_root = -1;
129 
130 		/* It's top level already, no more parent */
131 		if (root->root_key.objectid == BTRFS_FS_TREE_OBJECTID) {
132 			*root_ret = fs_info->fs_root;
133 			*ino_ret = BTRFS_FIRST_FREE_OBJECTID;
134 			return 0;
135 		}
136 
137 		ret = lookup_root_ref(fs_info, root->root_key.objectid,
138 				      &parent_root, ino_ret);
139 		if (ret < 0)
140 			return ret;
141 
142 		key.objectid = parent_root;
143 		key.type = BTRFS_ROOT_ITEM_KEY;
144 		key.offset = (u64)-1;
145 		*root_ret = btrfs_read_fs_root(fs_info, &key);
146 		if (IS_ERR(*root_ret))
147 			return PTR_ERR(*root_ret);
148 
149 		return 0;
150 	}
151 
152 	btrfs_init_path(&path);
153 	key.objectid = ino;
154 	key.type = BTRFS_INODE_REF_KEY;
155 	key.offset = (u64)-1;
156 
157 	ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
158 	if (ret < 0)
159 		return ret;
160 	/* Should not happen */
161 	if (ret == 0) {
162 		ret = -EUCLEAN;
163 		goto out;
164 	}
165 	ret = btrfs_previous_item(root, &path, ino, BTRFS_INODE_REF_KEY);
166 	if (ret < 0)
167 		goto out;
168 	if (ret > 0) {
169 		ret = -ENOENT;
170 		goto out;
171 	}
172 	btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
173 	*root_ret = root;
174 	*ino_ret = key.offset;
175 out:
176 	btrfs_release_path(&path);
177 	return ret;
178 }
179 
next_length(const char * path)180 static inline int next_length(const char *path)
181 {
182 	int res = 0;
183 	while (*path != '\0' && *path != '/') {
184 		++res;
185 		++path;
186 		if (res > BTRFS_NAME_LEN)
187 			break;
188 	}
189 	return res;
190 }
191 
skip_current_directories(const char * cur)192 static inline const char *skip_current_directories(const char *cur)
193 {
194 	while (1) {
195 		if (cur[0] == '/')
196 			++cur;
197 		else if (cur[0] == '.' && cur[1] == '/')
198 			cur += 2;
199 		else
200 			break;
201 	}
202 
203 	return cur;
204 }
205 
206 /*
207  * Resolve one filename of @ino of @root.
208  *
209  * key_ret:	The child key (either INODE_ITEM or ROOT_ITEM type)
210  * type_ret:	BTRFS_FT_* of the child inode.
211  *
212  * Return 0 with above members filled.
213  * Return <0 for error.
214  */
resolve_one_filename(struct btrfs_root * root,u64 ino,const char * name,int namelen,struct btrfs_key * key_ret,u8 * type_ret)215 static int resolve_one_filename(struct btrfs_root *root, u64 ino,
216 				const char *name, int namelen,
217 				struct btrfs_key *key_ret, u8 *type_ret)
218 {
219 	struct btrfs_dir_item *dir_item;
220 	struct btrfs_path path;
221 	int ret = 0;
222 
223 	btrfs_init_path(&path);
224 
225 	dir_item = btrfs_lookup_dir_item(NULL, root, &path, ino, name,
226 					 namelen, 0);
227 	if (IS_ERR(dir_item)) {
228 		ret = PTR_ERR(dir_item);
229 		goto out;
230 	}
231 
232 	btrfs_dir_item_key_to_cpu(path.nodes[0], dir_item, key_ret);
233 	*type_ret = btrfs_dir_type(path.nodes[0], dir_item);
234 out:
235 	btrfs_release_path(&path);
236 	return ret;
237 }
238 
239 /*
240  * Resolve a full path @filename. The start point is @ino of @root.
241  *
242  * The result will be filled into @root_ret, @ino_ret and @type_ret.
243  */
btrfs_lookup_path(struct btrfs_root * root,u64 ino,const char * filename,struct btrfs_root ** root_ret,u64 * ino_ret,u8 * type_ret,int symlink_limit)244 int btrfs_lookup_path(struct btrfs_root *root, u64 ino, const char *filename,
245 			struct btrfs_root **root_ret, u64 *ino_ret,
246 			u8 *type_ret, int symlink_limit)
247 {
248 	struct btrfs_fs_info *fs_info = root->fs_info;
249 	struct btrfs_root *next_root;
250 	struct btrfs_key key;
251 	const char *cur = filename;
252 	u64 next_ino;
253 	u8 next_type;
254 	u8 type = BTRFS_FT_UNKNOWN;
255 	int len;
256 	int ret = 0;
257 
258 	/* If the path is absolute path, also search from fs root */
259 	if (*cur == '/') {
260 		root = fs_info->fs_root;
261 		ino = btrfs_root_dirid(&root->root_item);
262 		type = BTRFS_FT_DIR;
263 	}
264 
265 	while (*cur != '\0') {
266 		cur = skip_current_directories(cur);
267 
268 		len = next_length(cur);
269 		if (len > BTRFS_NAME_LEN) {
270 			error("%s: Name too long at \"%.*s\"", __func__,
271 			       BTRFS_NAME_LEN, cur);
272 			return -ENAMETOOLONG;
273 		}
274 
275 		if (len == 1 && cur[0] == '.')
276 			break;
277 
278 		if (len == 2 && cur[0] == '.' && cur[1] == '.') {
279 			/* Go one level up */
280 			ret = get_parent_inode(root, ino, &next_root, &next_ino);
281 			if (ret < 0)
282 				return ret;
283 			root = next_root;
284 			ino = next_ino;
285 			goto next;
286 		}
287 
288 		if (!*cur)
289 			break;
290 
291 		ret = resolve_one_filename(root, ino, cur, len, &key, &type);
292 		if (ret < 0)
293 			return ret;
294 
295 		if (key.type == BTRFS_ROOT_ITEM_KEY) {
296 			/* Child inode is a subvolume */
297 
298 			next_root = btrfs_read_fs_root(fs_info, &key);
299 			if (IS_ERR(next_root))
300 				return PTR_ERR(next_root);
301 			root = next_root;
302 			ino = btrfs_root_dirid(&root->root_item);
303 		} else if (type == BTRFS_FT_SYMLINK && symlink_limit >= 0) {
304 			/* Child inode is a symlink */
305 
306 			char *target;
307 
308 			if (symlink_limit == 0) {
309 				error("%s: Too much symlinks!", __func__);
310 				return -EMLINK;
311 			}
312 			target = malloc(fs_info->sectorsize);
313 			if (!target)
314 				return -ENOMEM;
315 			ret = btrfs_readlink(root, key.objectid, target);
316 			if (ret < 0) {
317 				free(target);
318 				return ret;
319 			}
320 			target[ret] = '\0';
321 
322 			ret = btrfs_lookup_path(root, ino, target, &next_root,
323 						&next_ino, &next_type,
324 						symlink_limit);
325 			if (ret < 0)
326 				return ret;
327 			root = next_root;
328 			ino = next_ino;
329 			type = next_type;
330 		} else {
331 			/* Child inode is an inode */
332 			ino = key.objectid;
333 		}
334 next:
335 		cur += len;
336 	}
337 
338 	/* We haven't found anything, but still get no error? */
339 	if (type == BTRFS_FT_UNKNOWN && !ret)
340 		ret = -EUCLEAN;
341 
342 	if (!ret) {
343 		*root_ret = root;
344 		*ino_ret = ino;
345 		*type_ret = type;
346 	}
347 
348 	return ret;
349 }
350 
351 /*
352  * Read out inline extent.
353  *
354  * Since inline extent should only exist for offset 0, no need for extra
355  * parameters.
356  * Truncating should be handled by the caller.
357  *
358  * Return the number of bytes read.
359  * Return <0 for error.
360  */
btrfs_read_extent_inline(struct btrfs_path * path,struct btrfs_file_extent_item * fi,char * dest)361 int btrfs_read_extent_inline(struct btrfs_path *path,
362 			     struct btrfs_file_extent_item *fi, char *dest)
363 {
364 	struct extent_buffer *leaf = path->nodes[0];
365 	int slot = path->slots[0];
366 	char *cbuf = NULL;
367 	char *dbuf = NULL;
368 	u32 csize;
369 	u32 dsize;
370 	int ret;
371 
372 	csize = btrfs_file_extent_inline_item_len(leaf, btrfs_item_nr(slot));
373 	if (btrfs_file_extent_compression(leaf, fi) == BTRFS_COMPRESS_NONE) {
374 		/* Uncompressed, just read it out */
375 		read_extent_buffer(leaf, dest,
376 				btrfs_file_extent_inline_start(fi),
377 				csize);
378 		return csize;
379 	}
380 
381 	/* Compressed extent, prepare the compressed and data buffer */
382 	dsize = btrfs_file_extent_ram_bytes(leaf, fi);
383 	cbuf = malloc(csize);
384 	dbuf = malloc(dsize);
385 	if (!cbuf || !dbuf) {
386 		ret = -ENOMEM;
387 		goto out;
388 	}
389 	read_extent_buffer(leaf, cbuf, btrfs_file_extent_inline_start(fi),
390 			   csize);
391 	ret = btrfs_decompress(btrfs_file_extent_compression(leaf, fi),
392 			       cbuf, csize, dbuf, dsize);
393 	if (ret < 0 || ret != dsize) {
394 		ret = -EIO;
395 		goto out;
396 	}
397 	memcpy(dest, dbuf, dsize);
398 	ret = dsize;
399 out:
400 	free(cbuf);
401 	free(dbuf);
402 	return ret;
403 }
404 
405 /*
406  * Read out regular extent.
407  *
408  * Truncating should be handled by the caller.
409  *
410  * @offset and @len should not cross the extent boundary.
411  * Return the number of bytes read.
412  * Return <0 for error.
413  */
btrfs_read_extent_reg(struct btrfs_path * path,struct btrfs_file_extent_item * fi,u64 offset,int len,char * dest)414 int btrfs_read_extent_reg(struct btrfs_path *path,
415 			  struct btrfs_file_extent_item *fi, u64 offset,
416 			  int len, char *dest)
417 {
418 	struct extent_buffer *leaf = path->nodes[0];
419 	struct btrfs_fs_info *fs_info = leaf->fs_info;
420 	struct btrfs_key key;
421 	u64 extent_num_bytes;
422 	u64 disk_bytenr;
423 	u64 read;
424 	char *cbuf = NULL;
425 	char *dbuf = NULL;
426 	u32 csize;
427 	u32 dsize;
428 	bool finished = false;
429 	int num_copies;
430 	int i;
431 	int slot = path->slots[0];
432 	int ret;
433 
434 	btrfs_item_key_to_cpu(leaf, &key, slot);
435 	extent_num_bytes = btrfs_file_extent_num_bytes(leaf, fi);
436 	ASSERT(IS_ALIGNED(offset, fs_info->sectorsize) &&
437 	       IS_ALIGNED(len, fs_info->sectorsize));
438 	ASSERT(offset >= key.offset &&
439 	       offset + len <= key.offset + extent_num_bytes);
440 
441 	/* Preallocated or hole , fill @dest with zero */
442 	if (btrfs_file_extent_type(leaf, fi) == BTRFS_FILE_EXTENT_PREALLOC ||
443 	    btrfs_file_extent_disk_bytenr(leaf, fi) == 0) {
444 		memset(dest, 0, len);
445 		return len;
446 	}
447 
448 	if (btrfs_file_extent_compression(leaf, fi) == BTRFS_COMPRESS_NONE) {
449 		u64 logical;
450 
451 		logical = btrfs_file_extent_disk_bytenr(leaf, fi) +
452 			  btrfs_file_extent_offset(leaf, fi) +
453 			  offset - key.offset;
454 		read = len;
455 
456 		num_copies = btrfs_num_copies(fs_info, logical, len);
457 		for (i = 1; i <= num_copies; i++) {
458 			ret = read_extent_data(fs_info, dest, logical, &read, i);
459 			if (ret < 0 || read != len)
460 				continue;
461 			finished = true;
462 			break;
463 		}
464 		if (!finished)
465 			return -EIO;
466 		return len;
467 	}
468 
469 	csize = btrfs_file_extent_disk_num_bytes(leaf, fi);
470 	dsize = btrfs_file_extent_ram_bytes(leaf, fi);
471 	disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
472 	num_copies = btrfs_num_copies(fs_info, disk_bytenr, csize);
473 
474 	cbuf = malloc_cache_aligned(csize);
475 	dbuf = malloc_cache_aligned(dsize);
476 	if (!cbuf || !dbuf) {
477 		ret = -ENOMEM;
478 		goto out;
479 	}
480 	/* For compressed extent, we must read the whole on-disk extent */
481 	for (i = 1; i <= num_copies; i++) {
482 		read = csize;
483 		ret = read_extent_data(fs_info, cbuf, disk_bytenr,
484 				       &read, i);
485 		if (ret < 0 || read != csize)
486 			continue;
487 		finished = true;
488 		break;
489 	}
490 	if (!finished) {
491 		ret = -EIO;
492 		goto out;
493 	}
494 
495 	ret = btrfs_decompress(btrfs_file_extent_compression(leaf, fi), cbuf,
496 			       csize, dbuf, dsize);
497 	if (ret != dsize) {
498 		ret = -EIO;
499 		goto out;
500 	}
501 	/* Then copy the needed part */
502 	memcpy(dest, dbuf + btrfs_file_extent_offset(leaf, fi), len);
503 	ret = len;
504 out:
505 	free(cbuf);
506 	free(dbuf);
507 	return ret;
508 }
509 
510 /*
511  * Get the first file extent that covers bytenr @file_offset.
512  *
513  * @file_offset must be aligned to sectorsize.
514  *
515  * return 0 for found, and path points to the file extent.
516  * return >0 for not found, and fill @next_offset.
517  * @next_offset can be 0 if there is no next file extent.
518  * return <0 for error.
519  */
lookup_data_extent(struct btrfs_root * root,struct btrfs_path * path,u64 ino,u64 file_offset,u64 * next_offset)520 static int lookup_data_extent(struct btrfs_root *root, struct btrfs_path *path,
521 			      u64 ino, u64 file_offset, u64 *next_offset)
522 {
523 	struct btrfs_key key;
524 	struct btrfs_file_extent_item *fi;
525 	u8 extent_type;
526 	int ret = 0;
527 
528 	ASSERT(IS_ALIGNED(file_offset, root->fs_info->sectorsize));
529 	key.objectid = ino;
530 	key.type = BTRFS_EXTENT_DATA_KEY;
531 	key.offset = file_offset;
532 
533 	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
534 	/* Error or we're already at the file extent */
535 	if (ret <= 0)
536 		return ret;
537 	if (ret > 0) {
538 		/* Check previous file extent */
539 		ret = btrfs_previous_item(root, path, ino,
540 					  BTRFS_EXTENT_DATA_KEY);
541 		if (ret < 0)
542 			return ret;
543 		if (ret > 0)
544 			goto check_next;
545 	}
546 	/* Now the key.offset must be smaller than @file_offset */
547 	btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
548 	if (key.objectid != ino ||
549 	    key.type != BTRFS_EXTENT_DATA_KEY)
550 		goto check_next;
551 
552 	fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
553 			    struct btrfs_file_extent_item);
554 	extent_type = btrfs_file_extent_type(path->nodes[0], fi);
555 	if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
556 		if (file_offset == 0)
557 			return 0;
558 		/* Inline extent should be the only extent, no next extent. */
559 		*next_offset = 0;
560 		return 1;
561 	}
562 
563 	/* This file extent covers @file_offset */
564 	if (key.offset <= file_offset && key.offset +
565 	    btrfs_file_extent_num_bytes(path->nodes[0], fi) > file_offset)
566 		return 0;
567 check_next:
568 	ret = btrfs_next_item(root, path);
569 	if (ret < 0)
570 		return ret;
571 	if (ret > 0) {
572 		*next_offset = 0;
573 		return 1;
574 	}
575 
576 	btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
577 	fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
578 			    struct btrfs_file_extent_item);
579 	/* Next next data extent */
580 	if (key.objectid != ino ||
581 	    key.type != BTRFS_EXTENT_DATA_KEY) {
582 		*next_offset = 0;
583 		return 1;
584 	}
585 	/* Current file extent already beyond @file_offset */
586 	if (key.offset > file_offset) {
587 		*next_offset = key.offset;
588 		return 1;
589 	}
590 	/* This file extent covers @file_offset */
591 	if (key.offset <= file_offset && key.offset +
592 	    btrfs_file_extent_num_bytes(path->nodes[0], fi) > file_offset)
593 		return 0;
594 	/* This file extent ends before @file_offset, check next */
595 	ret = btrfs_next_item(root, path);
596 	if (ret < 0)
597 		return ret;
598 	if (ret > 0) {
599 		*next_offset = 0;
600 		return 1;
601 	}
602 	btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
603 	if (key.type != BTRFS_EXTENT_DATA_KEY || key.objectid != ino) {
604 		*next_offset = 0;
605 		return 1;
606 	}
607 	*next_offset = key.offset;
608 	return 1;
609 }
610 
read_and_truncate_page(struct btrfs_path * path,struct btrfs_file_extent_item * fi,int start,int len,char * dest)611 static int read_and_truncate_page(struct btrfs_path *path,
612 				  struct btrfs_file_extent_item *fi,
613 				  int start, int len, char *dest)
614 {
615 	struct extent_buffer *leaf = path->nodes[0];
616 	struct btrfs_fs_info *fs_info = leaf->fs_info;
617 	u64 aligned_start = round_down(start, fs_info->sectorsize);
618 	u8 extent_type;
619 	char *buf;
620 	int page_off = start - aligned_start;
621 	int page_len = fs_info->sectorsize - page_off;
622 	int ret;
623 
624 	ASSERT(start + len <= aligned_start + fs_info->sectorsize);
625 	buf = malloc_cache_aligned(fs_info->sectorsize);
626 	if (!buf)
627 		return -ENOMEM;
628 
629 	extent_type = btrfs_file_extent_type(leaf, fi);
630 	if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
631 		ret = btrfs_read_extent_inline(path, fi, buf);
632 		memcpy(dest, buf + page_off, min(page_len, ret));
633 		free(buf);
634 		return len;
635 	}
636 
637 	ret = btrfs_read_extent_reg(path, fi,
638 			round_down(start, fs_info->sectorsize),
639 			fs_info->sectorsize, buf);
640 	if (ret < 0) {
641 		free(buf);
642 		return ret;
643 	}
644 	memcpy(dest, buf + page_off, page_len);
645 	free(buf);
646 	return len;
647 }
648 
btrfs_file_read(struct btrfs_root * root,u64 ino,u64 file_offset,u64 len,char * dest)649 int btrfs_file_read(struct btrfs_root *root, u64 ino, u64 file_offset, u64 len,
650 		    char *dest)
651 {
652 	struct btrfs_fs_info *fs_info = root->fs_info;
653 	struct btrfs_file_extent_item *fi;
654 	struct btrfs_path path;
655 	struct btrfs_key key;
656 	u64 aligned_start = round_down(file_offset, fs_info->sectorsize);
657 	u64 aligned_end = round_down(file_offset + len, fs_info->sectorsize);
658 	u64 next_offset;
659 	u64 cur = aligned_start;
660 	int ret = 0;
661 
662 	btrfs_init_path(&path);
663 
664 	/* Set the whole dest all zero, so we won't need to bother holes */
665 	memset(dest, 0, len);
666 
667 	/* Read out the leading unaligned part */
668 	if (aligned_start != file_offset) {
669 		ret = lookup_data_extent(root, &path, ino, aligned_start,
670 					 &next_offset);
671 		if (ret < 0)
672 			goto out;
673 		if (ret == 0) {
674 			/* Read the unaligned part out*/
675 			fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
676 					struct btrfs_file_extent_item);
677 			ret = read_and_truncate_page(&path, fi, file_offset,
678 					round_up(file_offset, fs_info->sectorsize) -
679 					file_offset, dest);
680 			if (ret < 0)
681 				goto out;
682 			cur += fs_info->sectorsize;
683 		} else {
684 			/* The whole file is a hole */
685 			if (!next_offset) {
686 				memset(dest, 0, len);
687 				return len;
688 			}
689 			cur = next_offset;
690 		}
691 	}
692 
693 	/* Read the aligned part */
694 	while (cur < aligned_end) {
695 		u64 extent_num_bytes;
696 		u8 type;
697 
698 		btrfs_release_path(&path);
699 		ret = lookup_data_extent(root, &path, ino, cur, &next_offset);
700 		if (ret < 0)
701 			goto out;
702 		if (ret > 0) {
703 			/* No next, direct exit */
704 			if (!next_offset) {
705 				ret = 0;
706 				goto out;
707 			}
708 		}
709 		fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
710 				    struct btrfs_file_extent_item);
711 		btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
712 		type = btrfs_file_extent_type(path.nodes[0], fi);
713 		if (type == BTRFS_FILE_EXTENT_INLINE) {
714 			ret = btrfs_read_extent_inline(&path, fi, dest);
715 			goto out;
716 		}
717 		/* Skip holes, as we have zeroed the dest */
718 		if (type == BTRFS_FILE_EXTENT_PREALLOC ||
719 		    btrfs_file_extent_disk_bytenr(path.nodes[0], fi) == 0) {
720 			cur = key.offset + btrfs_file_extent_num_bytes(
721 					path.nodes[0], fi);
722 			continue;
723 		}
724 
725 		/* Read the remaining part of the extent */
726 		extent_num_bytes = btrfs_file_extent_num_bytes(path.nodes[0],
727 							       fi);
728 		ret = btrfs_read_extent_reg(&path, fi, cur,
729 				min(extent_num_bytes, aligned_end - cur),
730 				dest + cur - file_offset);
731 		if (ret < 0)
732 			goto out;
733 		cur += min(extent_num_bytes, aligned_end - cur);
734 	}
735 
736 	/* Read the tailing unaligned part*/
737 	if (file_offset + len != aligned_end) {
738 		btrfs_release_path(&path);
739 		ret = lookup_data_extent(root, &path, ino, aligned_end,
740 					 &next_offset);
741 		/* <0 is error, >0 means no extent */
742 		if (ret)
743 			goto out;
744 		fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
745 				    struct btrfs_file_extent_item);
746 		ret = read_and_truncate_page(&path, fi, aligned_end,
747 				file_offset + len - aligned_end,
748 				dest + aligned_end - file_offset);
749 	}
750 out:
751 	btrfs_release_path(&path);
752 	if (ret < 0)
753 		return ret;
754 	return len;
755 }
756