ext4: Switch to iomap for SEEK_HOLE / SEEK_DATA
Switch to the iomap_seek_hole and iomap_seek_data helpers for implementing lseek SEEK_HOLE / SEEK_DATA, and remove all the code that isn't needed any more. Note that with this patch ext4 will now always depend on the iomap code instead of only when CONFIG_DAX is enabled, and it requires adding a call into the extent status tree for iomap_begin as well to properly deal with delalloc extents. Signed-off-by: Christoph Hellwig <hch@lst.de> Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com> Signed-off-by: Theodore Ts'o <tytso@mit.edu> Reviewed-by: Jan Kara <jack@suse.cz> [More fixes and cleanups by Andreas]
This commit is contained in:
parent
7046ae3532
commit
545052e9e3
|
@ -37,6 +37,7 @@ config EXT4_FS
|
|||
select CRC16
|
||||
select CRYPTO
|
||||
select CRYPTO_CRC32C
|
||||
select FS_IOMAP
|
||||
help
|
||||
This is the next generation of the ext3 filesystem.
|
||||
|
||||
|
|
|
@ -2515,9 +2515,6 @@ extern void ext4_da_update_reserve_space(struct inode *inode,
|
|||
int used, int quota_claim);
|
||||
extern int ext4_issue_zeroout(struct inode *inode, ext4_lblk_t lblk,
|
||||
ext4_fsblk_t pblk, ext4_lblk_t len);
|
||||
extern int ext4_get_next_extent(struct inode *inode, ext4_lblk_t lblk,
|
||||
unsigned int map_len,
|
||||
struct extent_status *result);
|
||||
|
||||
/* indirect.c */
|
||||
extern int ext4_ind_map_blocks(handle_t *handle, struct inode *inode,
|
||||
|
|
263
fs/ext4/file.c
263
fs/ext4/file.c
|
@ -20,6 +20,7 @@
|
|||
|
||||
#include <linux/time.h>
|
||||
#include <linux/fs.h>
|
||||
#include <linux/iomap.h>
|
||||
#include <linux/mount.h>
|
||||
#include <linux/path.h>
|
||||
#include <linux/dax.h>
|
||||
|
@ -437,248 +438,6 @@ static int ext4_file_open(struct inode * inode, struct file * filp)
|
|||
return dquot_file_open(inode, filp);
|
||||
}
|
||||
|
||||
/*
|
||||
* Here we use ext4_map_blocks() to get a block mapping for a extent-based
|
||||
* file rather than ext4_ext_walk_space() because we can introduce
|
||||
* SEEK_DATA/SEEK_HOLE for block-mapped and extent-mapped file at the same
|
||||
* function. When extent status tree has been fully implemented, it will
|
||||
* track all extent status for a file and we can directly use it to
|
||||
* retrieve the offset for SEEK_DATA/SEEK_HOLE.
|
||||
*/
|
||||
|
||||
/*
|
||||
* When we retrieve the offset for SEEK_DATA/SEEK_HOLE, we would need to
|
||||
* lookup page cache to check whether or not there has some data between
|
||||
* [startoff, endoff] because, if this range contains an unwritten extent,
|
||||
* we determine this extent as a data or a hole according to whether the
|
||||
* page cache has data or not.
|
||||
*/
|
||||
static int ext4_find_unwritten_pgoff(struct inode *inode,
|
||||
int whence,
|
||||
ext4_lblk_t end_blk,
|
||||
loff_t *offset)
|
||||
{
|
||||
struct pagevec pvec;
|
||||
unsigned int blkbits;
|
||||
pgoff_t index;
|
||||
pgoff_t end;
|
||||
loff_t endoff;
|
||||
loff_t startoff;
|
||||
loff_t lastoff;
|
||||
int found = 0;
|
||||
|
||||
blkbits = inode->i_sb->s_blocksize_bits;
|
||||
startoff = *offset;
|
||||
lastoff = startoff;
|
||||
endoff = (loff_t)end_blk << blkbits;
|
||||
|
||||
index = startoff >> PAGE_SHIFT;
|
||||
end = (endoff - 1) >> PAGE_SHIFT;
|
||||
|
||||
pagevec_init(&pvec, 0);
|
||||
do {
|
||||
int i;
|
||||
unsigned long nr_pages;
|
||||
|
||||
nr_pages = pagevec_lookup_range(&pvec, inode->i_mapping,
|
||||
&index, end);
|
||||
if (nr_pages == 0)
|
||||
break;
|
||||
|
||||
for (i = 0; i < nr_pages; i++) {
|
||||
struct page *page = pvec.pages[i];
|
||||
struct buffer_head *bh, *head;
|
||||
|
||||
/*
|
||||
* If current offset is smaller than the page offset,
|
||||
* there is a hole at this offset.
|
||||
*/
|
||||
if (whence == SEEK_HOLE && lastoff < endoff &&
|
||||
lastoff < page_offset(pvec.pages[i])) {
|
||||
found = 1;
|
||||
*offset = lastoff;
|
||||
goto out;
|
||||
}
|
||||
|
||||
lock_page(page);
|
||||
|
||||
if (unlikely(page->mapping != inode->i_mapping)) {
|
||||
unlock_page(page);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!page_has_buffers(page)) {
|
||||
unlock_page(page);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (page_has_buffers(page)) {
|
||||
lastoff = page_offset(page);
|
||||
bh = head = page_buffers(page);
|
||||
do {
|
||||
if (lastoff + bh->b_size <= startoff)
|
||||
goto next;
|
||||
if (buffer_uptodate(bh) ||
|
||||
buffer_unwritten(bh)) {
|
||||
if (whence == SEEK_DATA)
|
||||
found = 1;
|
||||
} else {
|
||||
if (whence == SEEK_HOLE)
|
||||
found = 1;
|
||||
}
|
||||
if (found) {
|
||||
*offset = max_t(loff_t,
|
||||
startoff, lastoff);
|
||||
unlock_page(page);
|
||||
goto out;
|
||||
}
|
||||
next:
|
||||
lastoff += bh->b_size;
|
||||
bh = bh->b_this_page;
|
||||
} while (bh != head);
|
||||
}
|
||||
|
||||
lastoff = page_offset(page) + PAGE_SIZE;
|
||||
unlock_page(page);
|
||||
}
|
||||
|
||||
pagevec_release(&pvec);
|
||||
} while (index <= end);
|
||||
|
||||
/* There are no pages upto endoff - that would be a hole in there. */
|
||||
if (whence == SEEK_HOLE && lastoff < endoff) {
|
||||
found = 1;
|
||||
*offset = lastoff;
|
||||
}
|
||||
out:
|
||||
pagevec_release(&pvec);
|
||||
return found;
|
||||
}
|
||||
|
||||
/*
|
||||
* ext4_seek_data() retrieves the offset for SEEK_DATA.
|
||||
*/
|
||||
static loff_t ext4_seek_data(struct file *file, loff_t offset, loff_t maxsize)
|
||||
{
|
||||
struct inode *inode = file->f_mapping->host;
|
||||
struct extent_status es;
|
||||
ext4_lblk_t start, last, end;
|
||||
loff_t dataoff, isize;
|
||||
int blkbits;
|
||||
int ret;
|
||||
|
||||
inode_lock(inode);
|
||||
|
||||
isize = i_size_read(inode);
|
||||
if (offset < 0 || offset >= isize) {
|
||||
inode_unlock(inode);
|
||||
return -ENXIO;
|
||||
}
|
||||
|
||||
blkbits = inode->i_sb->s_blocksize_bits;
|
||||
start = offset >> blkbits;
|
||||
last = start;
|
||||
end = isize >> blkbits;
|
||||
dataoff = offset;
|
||||
|
||||
do {
|
||||
ret = ext4_get_next_extent(inode, last, end - last + 1, &es);
|
||||
if (ret <= 0) {
|
||||
/* No extent found -> no data */
|
||||
if (ret == 0)
|
||||
ret = -ENXIO;
|
||||
inode_unlock(inode);
|
||||
return ret;
|
||||
}
|
||||
|
||||
last = es.es_lblk;
|
||||
if (last != start)
|
||||
dataoff = (loff_t)last << blkbits;
|
||||
if (!ext4_es_is_unwritten(&es))
|
||||
break;
|
||||
|
||||
/*
|
||||
* If there is a unwritten extent at this offset,
|
||||
* it will be as a data or a hole according to page
|
||||
* cache that has data or not.
|
||||
*/
|
||||
if (ext4_find_unwritten_pgoff(inode, SEEK_DATA,
|
||||
es.es_lblk + es.es_len, &dataoff))
|
||||
break;
|
||||
last += es.es_len;
|
||||
dataoff = (loff_t)last << blkbits;
|
||||
cond_resched();
|
||||
} while (last <= end);
|
||||
|
||||
inode_unlock(inode);
|
||||
|
||||
if (dataoff > isize)
|
||||
return -ENXIO;
|
||||
|
||||
return vfs_setpos(file, dataoff, maxsize);
|
||||
}
|
||||
|
||||
/*
|
||||
* ext4_seek_hole() retrieves the offset for SEEK_HOLE.
|
||||
*/
|
||||
static loff_t ext4_seek_hole(struct file *file, loff_t offset, loff_t maxsize)
|
||||
{
|
||||
struct inode *inode = file->f_mapping->host;
|
||||
struct extent_status es;
|
||||
ext4_lblk_t start, last, end;
|
||||
loff_t holeoff, isize;
|
||||
int blkbits;
|
||||
int ret;
|
||||
|
||||
inode_lock(inode);
|
||||
|
||||
isize = i_size_read(inode);
|
||||
if (offset < 0 || offset >= isize) {
|
||||
inode_unlock(inode);
|
||||
return -ENXIO;
|
||||
}
|
||||
|
||||
blkbits = inode->i_sb->s_blocksize_bits;
|
||||
start = offset >> blkbits;
|
||||
last = start;
|
||||
end = isize >> blkbits;
|
||||
holeoff = offset;
|
||||
|
||||
do {
|
||||
ret = ext4_get_next_extent(inode, last, end - last + 1, &es);
|
||||
if (ret < 0) {
|
||||
inode_unlock(inode);
|
||||
return ret;
|
||||
}
|
||||
/* Found a hole? */
|
||||
if (ret == 0 || es.es_lblk > last) {
|
||||
if (last != start)
|
||||
holeoff = (loff_t)last << blkbits;
|
||||
break;
|
||||
}
|
||||
/*
|
||||
* If there is a unwritten extent at this offset,
|
||||
* it will be as a data or a hole according to page
|
||||
* cache that has data or not.
|
||||
*/
|
||||
if (ext4_es_is_unwritten(&es) &&
|
||||
ext4_find_unwritten_pgoff(inode, SEEK_HOLE,
|
||||
last + es.es_len, &holeoff))
|
||||
break;
|
||||
|
||||
last += es.es_len;
|
||||
holeoff = (loff_t)last << blkbits;
|
||||
cond_resched();
|
||||
} while (last <= end);
|
||||
|
||||
inode_unlock(inode);
|
||||
|
||||
if (holeoff > isize)
|
||||
holeoff = isize;
|
||||
|
||||
return vfs_setpos(file, holeoff, maxsize);
|
||||
}
|
||||
|
||||
/*
|
||||
* ext4_llseek() handles both block-mapped and extent-mapped maxbytes values
|
||||
* by calling generic_file_llseek_size() with the appropriate maxbytes
|
||||
|
@ -695,18 +454,24 @@ loff_t ext4_llseek(struct file *file, loff_t offset, int whence)
|
|||
maxbytes = inode->i_sb->s_maxbytes;
|
||||
|
||||
switch (whence) {
|
||||
case SEEK_SET:
|
||||
case SEEK_CUR:
|
||||
case SEEK_END:
|
||||
default:
|
||||
return generic_file_llseek_size(file, offset, whence,
|
||||
maxbytes, i_size_read(inode));
|
||||
case SEEK_DATA:
|
||||
return ext4_seek_data(file, offset, maxbytes);
|
||||
case SEEK_HOLE:
|
||||
return ext4_seek_hole(file, offset, maxbytes);
|
||||
inode_lock_shared(inode);
|
||||
offset = iomap_seek_hole(inode, offset, &ext4_iomap_ops);
|
||||
inode_unlock_shared(inode);
|
||||
break;
|
||||
case SEEK_DATA:
|
||||
inode_lock_shared(inode);
|
||||
offset = iomap_seek_data(inode, offset, &ext4_iomap_ops);
|
||||
inode_unlock_shared(inode);
|
||||
break;
|
||||
}
|
||||
|
||||
return -EINVAL;
|
||||
if (offset < 0)
|
||||
return offset;
|
||||
return vfs_setpos(file, offset, maxbytes);
|
||||
}
|
||||
|
||||
const struct file_operations ext4_file_operations = {
|
||||
|
|
109
fs/ext4/inode.c
109
fs/ext4/inode.c
|
@ -3393,7 +3393,6 @@ static int ext4_releasepage(struct page *page, gfp_t wait)
|
|||
return try_to_free_buffers(page);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_FS_DAX
|
||||
static int ext4_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
|
||||
unsigned flags, struct iomap *iomap)
|
||||
{
|
||||
|
@ -3402,6 +3401,7 @@ static int ext4_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
|
|||
unsigned long first_block = offset >> blkbits;
|
||||
unsigned long last_block = (offset + length - 1) >> blkbits;
|
||||
struct ext4_map_blocks map;
|
||||
bool delalloc = false;
|
||||
int ret;
|
||||
|
||||
|
||||
|
@ -3422,9 +3422,33 @@ static int ext4_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
|
|||
map.m_lblk = first_block;
|
||||
map.m_len = last_block - first_block + 1;
|
||||
|
||||
if (!(flags & IOMAP_WRITE)) {
|
||||
if (flags & IOMAP_REPORT) {
|
||||
ret = ext4_map_blocks(NULL, inode, &map, 0);
|
||||
} else {
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
if (ret == 0) {
|
||||
ext4_lblk_t end = map.m_lblk + map.m_len - 1;
|
||||
struct extent_status es;
|
||||
|
||||
ext4_es_find_delayed_extent_range(inode, map.m_lblk, end, &es);
|
||||
|
||||
if (!es.es_len || es.es_lblk > end) {
|
||||
/* entire range is a hole */
|
||||
} else if (es.es_lblk > map.m_lblk) {
|
||||
/* range starts with a hole */
|
||||
map.m_len = es.es_lblk - map.m_lblk;
|
||||
} else {
|
||||
ext4_lblk_t offs = 0;
|
||||
|
||||
if (es.es_lblk < map.m_lblk)
|
||||
offs = map.m_lblk - es.es_lblk;
|
||||
map.m_lblk = es.es_lblk + offs;
|
||||
map.m_len = es.es_len - offs;
|
||||
delalloc = true;
|
||||
}
|
||||
}
|
||||
} else if (flags & IOMAP_WRITE) {
|
||||
int dio_credits;
|
||||
handle_t *handle;
|
||||
int retries = 0;
|
||||
|
@ -3475,17 +3499,21 @@ retry:
|
|||
}
|
||||
}
|
||||
ext4_journal_stop(handle);
|
||||
} else {
|
||||
ret = ext4_map_blocks(NULL, inode, &map, 0);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
}
|
||||
|
||||
iomap->flags = 0;
|
||||
iomap->bdev = inode->i_sb->s_bdev;
|
||||
iomap->dax_dev = sbi->s_daxdev;
|
||||
iomap->offset = first_block << blkbits;
|
||||
iomap->length = (u64)map.m_len << blkbits;
|
||||
|
||||
if (ret == 0) {
|
||||
iomap->type = IOMAP_HOLE;
|
||||
iomap->type = delalloc ? IOMAP_DELALLOC : IOMAP_HOLE;
|
||||
iomap->addr = IOMAP_NULL_ADDR;
|
||||
iomap->length = (u64)map.m_len << blkbits;
|
||||
} else {
|
||||
if (map.m_flags & EXT4_MAP_MAPPED) {
|
||||
iomap->type = IOMAP_MAPPED;
|
||||
|
@ -3496,11 +3524,11 @@ retry:
|
|||
return -EIO;
|
||||
}
|
||||
iomap->addr = (u64)map.m_pblk << blkbits;
|
||||
iomap->length = (u64)map.m_len << blkbits;
|
||||
}
|
||||
|
||||
if (map.m_flags & EXT4_MAP_NEW)
|
||||
iomap->flags |= IOMAP_F_NEW;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -3561,8 +3589,6 @@ const struct iomap_ops ext4_iomap_ops = {
|
|||
.iomap_end = ext4_iomap_end,
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
static int ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
|
||||
ssize_t size, void *private)
|
||||
{
|
||||
|
@ -6118,70 +6144,3 @@ int ext4_filemap_fault(struct vm_fault *vmf)
|
|||
|
||||
return err;
|
||||
}
|
||||
|
||||
/*
|
||||
* Find the first extent at or after @lblk in an inode that is not a hole.
|
||||
* Search for @map_len blocks at most. The extent is returned in @result.
|
||||
*
|
||||
* The function returns 1 if we found an extent. The function returns 0 in
|
||||
* case there is no extent at or after @lblk and in that case also sets
|
||||
* @result->es_len to 0. In case of error, the error code is returned.
|
||||
*/
|
||||
int ext4_get_next_extent(struct inode *inode, ext4_lblk_t lblk,
|
||||
unsigned int map_len, struct extent_status *result)
|
||||
{
|
||||
struct ext4_map_blocks map;
|
||||
struct extent_status es = {};
|
||||
int ret;
|
||||
|
||||
map.m_lblk = lblk;
|
||||
map.m_len = map_len;
|
||||
|
||||
/*
|
||||
* For non-extent based files this loop may iterate several times since
|
||||
* we do not determine full hole size.
|
||||
*/
|
||||
while (map.m_len > 0) {
|
||||
ret = ext4_map_blocks(NULL, inode, &map, 0);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
/* There's extent covering m_lblk? Just return it. */
|
||||
if (ret > 0) {
|
||||
int status;
|
||||
|
||||
ext4_es_store_pblock(result, map.m_pblk);
|
||||
result->es_lblk = map.m_lblk;
|
||||
result->es_len = map.m_len;
|
||||
if (map.m_flags & EXT4_MAP_UNWRITTEN)
|
||||
status = EXTENT_STATUS_UNWRITTEN;
|
||||
else
|
||||
status = EXTENT_STATUS_WRITTEN;
|
||||
ext4_es_store_status(result, status);
|
||||
return 1;
|
||||
}
|
||||
ext4_es_find_delayed_extent_range(inode, map.m_lblk,
|
||||
map.m_lblk + map.m_len - 1,
|
||||
&es);
|
||||
/* Is delalloc data before next block in extent tree? */
|
||||
if (es.es_len && es.es_lblk < map.m_lblk + map.m_len) {
|
||||
ext4_lblk_t offset = 0;
|
||||
|
||||
if (es.es_lblk < lblk)
|
||||
offset = lblk - es.es_lblk;
|
||||
result->es_lblk = es.es_lblk + offset;
|
||||
ext4_es_store_pblock(result,
|
||||
ext4_es_pblock(&es) + offset);
|
||||
result->es_len = es.es_len - offset;
|
||||
ext4_es_store_status(result, ext4_es_status(&es));
|
||||
|
||||
return 1;
|
||||
}
|
||||
/* There's a hole at m_lblk, advance us after it */
|
||||
map.m_lblk += map.m_len;
|
||||
map_len -= map.m_len;
|
||||
map.m_len = map_len;
|
||||
cond_resched();
|
||||
}
|
||||
result->es_len = 0;
|
||||
return 0;
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue