diff options
Diffstat (limited to 'fs/erofs/zdata.c')
-rw-r--r-- | fs/erofs/zdata.c | 283 |
1 files changed, 119 insertions, 164 deletions
diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c index 5f1890e309c6..036f610e044b 100644 --- a/fs/erofs/zdata.c +++ b/fs/erofs/zdata.c @@ -143,22 +143,17 @@ static inline void z_erofs_onlinepage_split(struct page *page) atomic_inc((atomic_t *)&page->private); } -static inline void z_erofs_page_mark_eio(struct page *page) +static void z_erofs_onlinepage_endio(struct page *page, int err) { - int orig; + int orig, v; + + DBG_BUGON(!PagePrivate(page)); do { orig = atomic_read((atomic_t *)&page->private); - } while (atomic_cmpxchg((atomic_t *)&page->private, orig, - orig | Z_EROFS_PAGE_EIO) != orig); -} - -static inline void z_erofs_onlinepage_endio(struct page *page) -{ - unsigned int v; + v = (orig - 1) | (err ? Z_EROFS_PAGE_EIO : 0); + } while (atomic_cmpxchg((atomic_t *)&page->private, orig, v) != orig); - DBG_BUGON(!PagePrivate(page)); - v = atomic_dec_return((atomic_t *)&page->private); if (!(v & ~Z_EROFS_PAGE_EIO)) { set_page_private(page, 0); ClearPagePrivate(page); @@ -507,19 +502,17 @@ enum z_erofs_pclustermode { */ Z_EROFS_PCLUSTER_FOLLOWED_NOINPLACE, /* - * The current collection has been linked with the owned chain, and - * could also be linked with the remaining collections, which means - * if the processing page is the tail page of the collection, thus - * the current collection can safely use the whole page (since - * the previous collection is under control) for in-place I/O, as - * illustrated below: - * ________________________________________________________________ - * | tail (partial) page | head (partial) page | - * | (of the current cl) | (of the previous collection) | - * | | | - * |__PCLUSTER_FOLLOWED___|___________PCLUSTER_FOLLOWED____________| + * The pcluster was just linked to a decompression chain by us. It can + * also be linked with the remaining pclusters, which means if the + * processing page is the tail page of a pcluster, this pcluster can + * safely use the whole page (since the previous pcluster is within the + * same chain) for in-place I/O, as illustrated below: + * ___________________________________________________ + * | tail (partial) page | head (partial) page | + * | (of the current pcl) | (of the previous pcl) | + * |___PCLUSTER_FOLLOWED___|_____PCLUSTER_FOLLOWED_____| * - * [ (*) the above page can be used as inplace I/O. ] + * [ (*) the page above can be used as inplace I/O. ] */ Z_EROFS_PCLUSTER_FOLLOWED, }; @@ -535,8 +528,6 @@ struct z_erofs_decompress_frontend { z_erofs_next_pcluster_t owned_head; enum z_erofs_pclustermode mode; - /* used for applying cache strategy on the fly */ - bool backmost; erofs_off_t headoffset; /* a pointer used to pick up inplace I/O pages */ @@ -545,7 +536,7 @@ struct z_erofs_decompress_frontend { #define DECOMPRESS_FRONTEND_INIT(__i) { \ .inode = __i, .owned_head = Z_EROFS_PCLUSTER_TAIL, \ - .mode = Z_EROFS_PCLUSTER_FOLLOWED, .backmost = true } + .mode = Z_EROFS_PCLUSTER_FOLLOWED } static bool z_erofs_should_alloc_cache(struct z_erofs_decompress_frontend *fe) { @@ -554,7 +545,7 @@ static bool z_erofs_should_alloc_cache(struct z_erofs_decompress_frontend *fe) if (cachestrategy <= EROFS_ZIP_CACHE_DISABLED) return false; - if (fe->backmost) + if (!(fe->map.m_flags & EROFS_MAP_FULL_MAPPED)) return true; if (cachestrategy >= EROFS_ZIP_CACHE_READAROUND && @@ -851,9 +842,11 @@ err_out: return err; } -static int z_erofs_collector_begin(struct z_erofs_decompress_frontend *fe) +static int z_erofs_pcluster_begin(struct z_erofs_decompress_frontend *fe) { struct erofs_map_blocks *map = &fe->map; + struct super_block *sb = fe->inode->i_sb; + erofs_blk_t blknr = erofs_blknr(sb, map->m_pa); struct erofs_workgroup *grp = NULL; int ret; @@ -863,8 +856,7 @@ static int z_erofs_collector_begin(struct z_erofs_decompress_frontend *fe) DBG_BUGON(fe->owned_head == Z_EROFS_PCLUSTER_NIL); if (!(map->m_flags & EROFS_MAP_META)) { - grp = erofs_find_workgroup(fe->inode->i_sb, - map->m_pa >> PAGE_SHIFT); + grp = erofs_find_workgroup(sb, blknr); } else if ((map->m_pa & ~PAGE_MASK) + map->m_plen > PAGE_SIZE) { DBG_BUGON(1); return -EFSCORRUPTED; @@ -883,9 +875,26 @@ static int z_erofs_collector_begin(struct z_erofs_decompress_frontend *fe) } else if (ret) { return ret; } + z_erofs_bvec_iter_begin(&fe->biter, &fe->pcl->bvset, Z_EROFS_INLINE_BVECS, fe->pcl->vcnt); - /* since file-backed online pages are traversed in reverse order */ + if (!z_erofs_is_inline_pcluster(fe->pcl)) { + /* bind cache first when cached decompression is preferred */ + z_erofs_bind_cache(fe); + } else { + void *mptr; + + mptr = erofs_read_metabuf(&map->buf, sb, blknr, EROFS_NO_KMAP); + if (IS_ERR(mptr)) { + ret = PTR_ERR(mptr); + erofs_err(sb, "failed to get inline data %d", ret); + return ret; + } + get_page(map->buf.page); + WRITE_ONCE(fe->pcl->compressed_bvecs[0].page, map->buf.page); + fe->mode = Z_EROFS_PCLUSTER_FOLLOWED_NOINPLACE; + } + /* file-backed inplace I/O pages are traversed in reverse order */ fe->icur = z_erofs_pclusterpages(fe->pcl); return 0; } @@ -908,12 +917,12 @@ void erofs_workgroup_free_rcu(struct erofs_workgroup *grp) call_rcu(&pcl->rcu, z_erofs_rcu_callback); } -static bool z_erofs_collector_end(struct z_erofs_decompress_frontend *fe) +static void z_erofs_pcluster_end(struct z_erofs_decompress_frontend *fe) { struct z_erofs_pcluster *pcl = fe->pcl; if (!pcl) - return false; + return; z_erofs_bvec_iter_end(&fe->biter); mutex_unlock(&pcl->lock); @@ -929,37 +938,29 @@ static bool z_erofs_collector_end(struct z_erofs_decompress_frontend *fe) erofs_workgroup_put(&pcl->obj); fe->pcl = NULL; - return true; } -static int z_erofs_read_fragment(struct inode *inode, erofs_off_t pos, - struct page *page, unsigned int pageofs, - unsigned int len) +static int z_erofs_read_fragment(struct super_block *sb, struct page *page, + unsigned int cur, unsigned int end, erofs_off_t pos) { - struct super_block *sb = inode->i_sb; - struct inode *packed_inode = EROFS_I_SB(inode)->packed_inode; + struct inode *packed_inode = EROFS_SB(sb)->packed_inode; struct erofs_buf buf = __EROFS_BUF_INITIALIZER; - u8 *src, *dst; - unsigned int i, cnt; + unsigned int cnt; + u8 *src; if (!packed_inode) return -EFSCORRUPTED; buf.inode = packed_inode; - pos += EROFS_I(inode)->z_fragmentoff; - for (i = 0; i < len; i += cnt) { - cnt = min_t(unsigned int, len - i, + for (; cur < end; cur += cnt, pos += cnt) { + cnt = min_t(unsigned int, end - cur, sb->s_blocksize - erofs_blkoff(sb, pos)); src = erofs_bread(&buf, erofs_blknr(sb, pos), EROFS_KMAP); if (IS_ERR(src)) { erofs_put_metabuf(&buf); return PTR_ERR(src); } - - dst = kmap_local_page(page); - memcpy(dst + pageofs + i, src + erofs_blkoff(sb, pos), cnt); - kunmap_local(dst); - pos += cnt; + memcpy_to_page(page, cur, src + erofs_blkoff(sb, pos), cnt); } erofs_put_metabuf(&buf); return 0; @@ -972,94 +973,60 @@ static int z_erofs_do_read_page(struct z_erofs_decompress_frontend *fe, struct erofs_map_blocks *const map = &fe->map; const loff_t offset = page_offset(page); bool tight = true, exclusive; - unsigned int cur, end, spiltted; + unsigned int cur, end, len, split; int err = 0; - /* register locked file pages as online pages in pack */ z_erofs_onlinepage_init(page); - spiltted = 0; + split = 0; end = PAGE_SIZE; repeat: - cur = end - 1; - - if (offset + cur < map->m_la || - offset + cur >= map->m_la + map->m_llen) { - if (z_erofs_collector_end(fe)) - fe->backmost = false; - map->m_la = offset + cur; + if (offset + end - 1 < map->m_la || + offset + end - 1 >= map->m_la + map->m_llen) { + z_erofs_pcluster_end(fe); + map->m_la = offset + end - 1; map->m_llen = 0; err = z_erofs_map_blocks_iter(inode, map, 0); if (err) goto out; - } else { - if (fe->pcl) - goto hitted; - /* didn't get a valid pcluster previously (very rare) */ } - if (!(map->m_flags & EROFS_MAP_MAPPED) || - map->m_flags & EROFS_MAP_FRAGMENT) - goto hitted; - - err = z_erofs_collector_begin(fe); - if (err) - goto out; + cur = offset > map->m_la ? 0 : map->m_la - offset; + /* bump split parts first to avoid several separate cases */ + ++split; - if (z_erofs_is_inline_pcluster(fe->pcl)) { - void *mp; - - mp = erofs_read_metabuf(&fe->map.buf, inode->i_sb, - erofs_blknr(inode->i_sb, map->m_pa), - EROFS_NO_KMAP); - if (IS_ERR(mp)) { - err = PTR_ERR(mp); - erofs_err(inode->i_sb, - "failed to get inline page, err %d", err); - goto out; - } - get_page(fe->map.buf.page); - WRITE_ONCE(fe->pcl->compressed_bvecs[0].page, - fe->map.buf.page); - fe->mode = Z_EROFS_PCLUSTER_FOLLOWED_NOINPLACE; - } else { - /* bind cache first when cached decompression is preferred */ - z_erofs_bind_cache(fe); - } -hitted: - /* - * Ensure the current partial page belongs to this submit chain rather - * than other concurrent submit chains or the noio(bypass) chain since - * those chains are handled asynchronously thus the page cannot be used - * for inplace I/O or bvpage (should be processed in a strict order.) - */ - tight &= (fe->mode > Z_EROFS_PCLUSTER_FOLLOWED_NOINPLACE); - - cur = end - min_t(unsigned int, offset + end - map->m_la, end); if (!(map->m_flags & EROFS_MAP_MAPPED)) { zero_user_segment(page, cur, end); + tight = false; goto next_part; } + if (map->m_flags & EROFS_MAP_FRAGMENT) { - unsigned int pageofs, skip, len; + erofs_off_t fpos = offset + cur - map->m_la; - if (offset > map->m_la) { - pageofs = 0; - skip = offset - map->m_la; - } else { - pageofs = map->m_la & ~PAGE_MASK; - skip = 0; - } - len = min_t(unsigned int, map->m_llen - skip, end - cur); - err = z_erofs_read_fragment(inode, skip, page, pageofs, len); + len = min_t(unsigned int, map->m_llen - fpos, end - cur); + err = z_erofs_read_fragment(inode->i_sb, page, cur, cur + len, + EROFS_I(inode)->z_fragmentoff + fpos); if (err) goto out; - ++spiltted; tight = false; goto next_part; } - exclusive = (!cur && (!spiltted || tight)); + if (!fe->pcl) { + err = z_erofs_pcluster_begin(fe); + if (err) + goto out; + } + + /* + * Ensure the current partial page belongs to this submit chain rather + * than other concurrent submit chains or the noio(bypass) chain since + * those chains are handled asynchronously thus the page cannot be used + * for inplace I/O or bvpage (should be processed in a strict order.) + */ + tight &= (fe->mode > Z_EROFS_PCLUSTER_FOLLOWED_NOINPLACE); + exclusive = (!cur && ((split <= 1) || tight)); if (cur) tight &= (fe->mode >= Z_EROFS_PCLUSTER_FOLLOWED); @@ -1072,8 +1039,6 @@ hitted: goto out; z_erofs_onlinepage_split(page); - /* bump up the number of spiltted parts of a page */ - ++spiltted; if (fe->pcl->pageofs_out != (map->m_la & ~PAGE_MASK)) fe->pcl->multibases = true; if (fe->pcl->length < offset + end - map->m_la) { @@ -1094,9 +1059,7 @@ next_part: goto repeat; out: - if (err) - z_erofs_page_mark_eio(page); - z_erofs_onlinepage_endio(page); + z_erofs_onlinepage_endio(page, err); return err; } @@ -1144,10 +1107,11 @@ static void z_erofs_do_decompressed_bvec(struct z_erofs_decompress_backend *be, struct z_erofs_bvec *bvec) { struct z_erofs_bvec_item *item; + unsigned int pgnr; - if (!((bvec->offset + be->pcl->pageofs_out) & ~PAGE_MASK)) { - unsigned int pgnr; - + if (!((bvec->offset + be->pcl->pageofs_out) & ~PAGE_MASK) && + (bvec->end == PAGE_SIZE || + bvec->offset + bvec->end == be->pcl->length)) { pgnr = (bvec->offset + be->pcl->pageofs_out) >> PAGE_SHIFT; DBG_BUGON(pgnr >= be->nr_pages); if (!be->decompressed_pages[pgnr]) { @@ -1198,9 +1162,7 @@ static void z_erofs_fill_other_copies(struct z_erofs_decompress_backend *be, cur += len; } kunmap_local(dst); - if (err) - z_erofs_page_mark_eio(bvi->bvec.page); - z_erofs_onlinepage_endio(bvi->bvec.page); + z_erofs_onlinepage_endio(bvi->bvec.page, err); list_del(p); kfree(bvi); } @@ -1371,9 +1333,7 @@ out: /* recycle all individual short-lived pages */ if (z_erofs_put_shortlivedpage(be->pagepool, page)) continue; - if (err) - z_erofs_page_mark_eio(page); - z_erofs_onlinepage_endio(page); + z_erofs_onlinepage_endio(page, err); } if (be->decompressed_pages != be->onstack_pages) @@ -1409,7 +1369,10 @@ static void z_erofs_decompress_queue(const struct z_erofs_decompressqueue *io, owned = READ_ONCE(be.pcl->next); z_erofs_decompress_pcluster(&be, io->eio ? -EIO : 0); - erofs_workgroup_put(&be.pcl->obj); + if (z_erofs_is_inline_pcluster(be.pcl)) + z_erofs_free_pcluster(be.pcl); + else + erofs_workgroup_put(&be.pcl->obj); } } @@ -1841,21 +1804,16 @@ static void z_erofs_pcluster_readmore(struct z_erofs_decompress_frontend *f, } cur = map->m_la + map->m_llen - 1; - while (cur >= end) { + while ((cur >= end) && (cur < i_size_read(inode))) { pgoff_t index = cur >> PAGE_SHIFT; struct page *page; page = erofs_grab_cache_page_nowait(inode->i_mapping, index); if (page) { - if (PageUptodate(page)) { + if (PageUptodate(page)) unlock_page(page); - } else { - err = z_erofs_do_read_page(f, page); - if (err) - erofs_err(inode->i_sb, - "readmore error at page %lu @ nid %llu", - index, EROFS_I(inode)->nid); - } + else + (void)z_erofs_do_read_page(f, page); put_page(page); } @@ -1867,25 +1825,25 @@ static void z_erofs_pcluster_readmore(struct z_erofs_decompress_frontend *f, static int z_erofs_read_folio(struct file *file, struct folio *folio) { - struct page *page = &folio->page; - struct inode *const inode = page->mapping->host; + struct inode *const inode = folio->mapping->host; struct erofs_sb_info *const sbi = EROFS_I_SB(inode); struct z_erofs_decompress_frontend f = DECOMPRESS_FRONTEND_INIT(inode); int err; - trace_erofs_readpage(page, false); - f.headoffset = (erofs_off_t)page->index << PAGE_SHIFT; + trace_erofs_read_folio(folio, false); + f.headoffset = (erofs_off_t)folio->index << PAGE_SHIFT; z_erofs_pcluster_readmore(&f, NULL, true); - err = z_erofs_do_read_page(&f, page); + err = z_erofs_do_read_page(&f, &folio->page); z_erofs_pcluster_readmore(&f, NULL, false); - (void)z_erofs_collector_end(&f); + z_erofs_pcluster_end(&f); /* if some compressed cluster ready, need submit them anyway */ z_erofs_runqueue(&f, z_erofs_is_sync_decompress(sbi, 0), false); - if (err) - erofs_err(inode->i_sb, "failed to read, err [%d]", err); + if (err && err != -EINTR) + erofs_err(inode->i_sb, "read error %d @ %lu of nid %llu", + err, folio->index, EROFS_I(inode)->nid); erofs_put_metabuf(&f.map.buf); erofs_release_pages(&f.pagepool); @@ -1897,38 +1855,35 @@ static void z_erofs_readahead(struct readahead_control *rac) struct inode *const inode = rac->mapping->host; struct erofs_sb_info *const sbi = EROFS_I_SB(inode); struct z_erofs_decompress_frontend f = DECOMPRESS_FRONTEND_INIT(inode); - struct page *head = NULL, *page; - unsigned int nr_pages; + struct folio *head = NULL, *folio; + unsigned int nr_folios; + int err; f.headoffset = readahead_pos(rac); z_erofs_pcluster_readmore(&f, rac, true); - nr_pages = readahead_count(rac); - trace_erofs_readpages(inode, readahead_index(rac), nr_pages, false); + nr_folios = readahead_count(rac); + trace_erofs_readpages(inode, readahead_index(rac), nr_folios, false); - while ((page = readahead_page(rac))) { - set_page_private(page, (unsigned long)head); - head = page; + while ((folio = readahead_folio(rac))) { + folio->private = head; + head = folio; } + /* traverse in reverse order for best metadata I/O performance */ while (head) { - struct page *page = head; - int err; - - /* traversal in reverse order */ - head = (void *)page_private(page); + folio = head; + head = folio_get_private(folio); - err = z_erofs_do_read_page(&f, page); - if (err) - erofs_err(inode->i_sb, - "readahead error at page %lu @ nid %llu", - page->index, EROFS_I(inode)->nid); - put_page(page); + err = z_erofs_do_read_page(&f, &folio->page); + if (err && err != -EINTR) + erofs_err(inode->i_sb, "readahead error at folio %lu @ nid %llu", + folio->index, EROFS_I(inode)->nid); } z_erofs_pcluster_readmore(&f, rac, false); - (void)z_erofs_collector_end(&f); + z_erofs_pcluster_end(&f); - z_erofs_runqueue(&f, z_erofs_is_sync_decompress(sbi, nr_pages), true); + z_erofs_runqueue(&f, z_erofs_is_sync_decompress(sbi, nr_folios), true); erofs_put_metabuf(&f.map.buf); erofs_release_pages(&f.pagepool); } |