diff options
Diffstat (limited to 'fs/erofs/zdata.c')
-rw-r--r-- | fs/erofs/zdata.c | 225 |
1 files changed, 98 insertions, 127 deletions
diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c index 424f656cd765..a569ff9dfd04 100644 --- a/fs/erofs/zdata.c +++ b/fs/erofs/zdata.c @@ -122,42 +122,6 @@ static bool erofs_folio_is_managed(struct erofs_sb_info *sbi, struct folio *fo) return fo->mapping == MNGD_MAPPING(sbi); } -/* - * bit 30: I/O error occurred on this folio - * bit 0 - 29: remaining parts to complete this folio - */ -#define Z_EROFS_FOLIO_EIO (1 << 30) - -static void z_erofs_onlinefolio_init(struct folio *folio) -{ - union { - atomic_t o; - void *v; - } u = { .o = ATOMIC_INIT(1) }; - - folio->private = u.v; /* valid only if file-backed folio is locked */ -} - -static void z_erofs_onlinefolio_split(struct folio *folio) -{ - atomic_inc((atomic_t *)&folio->private); -} - -static void z_erofs_onlinefolio_end(struct folio *folio, int err) -{ - int orig, v; - - do { - orig = atomic_read((atomic_t *)&folio->private); - v = (orig - 1) | (err ? Z_EROFS_FOLIO_EIO : 0); - } while (atomic_cmpxchg((atomic_t *)&folio->private, orig, v) != orig); - - if (v & ~Z_EROFS_FOLIO_EIO) - return; - folio->private = 0; - folio_end_read(folio, !(v & Z_EROFS_FOLIO_EIO)); -} - #define Z_EROFS_ONSTACK_PAGES 32 /* @@ -232,7 +196,8 @@ static int z_erofs_bvec_enqueue(struct z_erofs_bvec_iter *iter, struct page *nextpage = *candidate_bvpage; if (!nextpage) { - nextpage = erofs_allocpage(pagepool, GFP_KERNEL); + nextpage = __erofs_allocpage(pagepool, GFP_KERNEL, + true); if (!nextpage) return -ENOMEM; set_page_private(nextpage, Z_EROFS_SHORTLIVED_PAGE); @@ -745,24 +710,6 @@ static int z_erofs_attach_page(struct z_erofs_decompress_frontend *fe, return ret; } -static void z_erofs_try_to_claim_pcluster(struct z_erofs_decompress_frontend *f) -{ - struct z_erofs_pcluster *pcl = f->pcl; - z_erofs_next_pcluster_t *owned_head = &f->owned_head; - - /* type 1, nil pcluster (this pcluster doesn't belong to any chain.) */ - if (cmpxchg(&pcl->next, Z_EROFS_PCLUSTER_NIL, - *owned_head) == Z_EROFS_PCLUSTER_NIL) { - *owned_head = &pcl->next; - /* so we can attach this pcluster to our submission chain. */ - f->mode = Z_EROFS_PCLUSTER_FOLLOWED; - return; - } - - /* type 2, it belongs to an ongoing chain */ - f->mode = Z_EROFS_PCLUSTER_INFLIGHT; -} - static int z_erofs_register_pcluster(struct z_erofs_decompress_frontend *fe) { struct erofs_map_blocks *map = &fe->map; @@ -838,7 +785,6 @@ static int z_erofs_pcluster_begin(struct z_erofs_decompress_frontend *fe) int ret; DBG_BUGON(fe->pcl); - /* must be Z_EROFS_PCLUSTER_TAIL or pointed to previous pcluster */ DBG_BUGON(fe->owned_head == Z_EROFS_PCLUSTER_NIL); @@ -858,7 +804,15 @@ static int z_erofs_pcluster_begin(struct z_erofs_decompress_frontend *fe) if (ret == -EEXIST) { mutex_lock(&fe->pcl->lock); - z_erofs_try_to_claim_pcluster(fe); + /* check if this pcluster hasn't been linked into any chain. */ + if (cmpxchg(&fe->pcl->next, Z_EROFS_PCLUSTER_NIL, + fe->owned_head) == Z_EROFS_PCLUSTER_NIL) { + /* .. so it can be attached to our submission chain */ + fe->owned_head = &fe->pcl->next; + fe->mode = Z_EROFS_PCLUSTER_FOLLOWED; + } else { /* otherwise, it belongs to an inflight chain */ + fe->mode = Z_EROFS_PCLUSTER_INFLIGHT; + } } else if (ret) { return ret; } @@ -965,7 +919,7 @@ static int z_erofs_scan_folio(struct z_erofs_decompress_frontend *f, int err = 0; tight = (bs == PAGE_SIZE); - z_erofs_onlinefolio_init(folio); + erofs_onlinefolio_init(folio); do { if (offset + end - 1 < map->m_la || offset + end - 1 >= map->m_la + map->m_llen) { @@ -1024,7 +978,7 @@ static int z_erofs_scan_folio(struct z_erofs_decompress_frontend *f, if (err) break; - z_erofs_onlinefolio_split(folio); + erofs_onlinefolio_split(folio); if (f->pcl->pageofs_out != (map->m_la & ~PAGE_MASK)) f->pcl->multibases = true; if (f->pcl->length < offset + end - map->m_la) { @@ -1044,7 +998,7 @@ static int z_erofs_scan_folio(struct z_erofs_decompress_frontend *f, tight = (bs == PAGE_SIZE); } } while ((end = cur) > 0); - z_erofs_onlinefolio_end(folio, err); + erofs_onlinefolio_end(folio, err); return err; } @@ -1147,7 +1101,7 @@ static void z_erofs_fill_other_copies(struct z_erofs_decompress_backend *be, cur += len; } kunmap_local(dst); - z_erofs_onlinefolio_end(page_folio(bvi->bvec.page), err); + erofs_onlinefolio_end(page_folio(bvi->bvec.page), err); list_del(p); kfree(bvi); } @@ -1190,9 +1144,10 @@ static int z_erofs_parse_in_bvecs(struct z_erofs_decompress_backend *be, struct z_erofs_bvec *bvec = &pcl->compressed_bvecs[i]; struct page *page = bvec->page; - /* compressed data ought to be valid before decompressing */ - if (!page) { - err = -EIO; + /* compressed data ought to be valid when decompressing */ + if (IS_ERR(page) || !page) { + bvec->page = NULL; /* clear the failure reason */ + err = page ? PTR_ERR(page) : -EIO; continue; } be->compressed_pages[i] = page; @@ -1268,8 +1223,7 @@ static int z_erofs_decompress_pcluster(struct z_erofs_decompress_backend *be, .inplace_io = overlapped, .partial_decoding = pcl->partial, .fillgaps = pcl->multibases, - .gfp = pcl->besteffort ? - GFP_KERNEL | __GFP_NOFAIL : + .gfp = pcl->besteffort ? GFP_KERNEL : GFP_NOWAIT | __GFP_NORETRY }, be->pagepool); @@ -1302,7 +1256,7 @@ static int z_erofs_decompress_pcluster(struct z_erofs_decompress_backend *be, DBG_BUGON(z_erofs_page_is_invalidated(page)); if (!z_erofs_is_shortlived_page(page)) { - z_erofs_onlinefolio_end(page_folio(page), err); + erofs_onlinefolio_end(page_folio(page), err); continue; } if (pcl->algorithmformat != Z_EROFS_COMPRESSION_LZ4) { @@ -1333,8 +1287,8 @@ static int z_erofs_decompress_pcluster(struct z_erofs_decompress_backend *be, return err; } -static void z_erofs_decompress_queue(const struct z_erofs_decompressqueue *io, - struct page **pagepool) +static int z_erofs_decompress_queue(const struct z_erofs_decompressqueue *io, + struct page **pagepool) { struct z_erofs_decompress_backend be = { .sb = io->sb, @@ -1343,6 +1297,7 @@ static void z_erofs_decompress_queue(const struct z_erofs_decompressqueue *io, LIST_HEAD_INIT(be.decompressed_secondary_bvecs), }; z_erofs_next_pcluster_t owned = io->head; + int err = io->eio ? -EIO : 0; while (owned != Z_EROFS_PCLUSTER_TAIL) { DBG_BUGON(owned == Z_EROFS_PCLUSTER_NIL); @@ -1350,12 +1305,13 @@ static void z_erofs_decompress_queue(const struct z_erofs_decompressqueue *io, be.pcl = container_of(owned, struct z_erofs_pcluster, next); owned = READ_ONCE(be.pcl->next); - z_erofs_decompress_pcluster(&be, io->eio ? -EIO : 0); + err = z_erofs_decompress_pcluster(&be, err) ?: err; if (z_erofs_is_inline_pcluster(be.pcl)) z_erofs_free_pcluster(be.pcl); else erofs_workgroup_put(&be.pcl->obj); } + return err; } static void z_erofs_decompressqueue_work(struct work_struct *work) @@ -1428,6 +1384,7 @@ static void z_erofs_fill_bio_vec(struct bio_vec *bvec, struct z_erofs_bvec zbv; struct address_space *mapping; struct folio *folio; + struct page *page; int bs = i_blocksize(f->inode); /* Except for inplace folios, the entire folio can be used for I/Os */ @@ -1450,7 +1407,6 @@ repeat: * file-backed folios will be used instead. */ if (folio->private == (void *)Z_EROFS_PREALLOCATED_PAGE) { - folio->private = 0; tocache = true; goto out_tocache; } @@ -1468,7 +1424,7 @@ repeat: } folio_lock(folio); - if (folio->mapping == mc) { + if (likely(folio->mapping == mc)) { /* * The cached folio is still in managed cache but without * a valid `->private` pcluster hint. Let's reconnect them. @@ -1478,41 +1434,48 @@ repeat: /* compressed_bvecs[] already takes a ref before */ folio_put(folio); } - - /* no need to submit if it is already up-to-date */ - if (folio_test_uptodate(folio)) { - folio_unlock(folio); - bvec->bv_page = NULL; + if (likely(folio->private == pcl)) { + /* don't submit cache I/Os again if already uptodate */ + if (folio_test_uptodate(folio)) { + folio_unlock(folio); + bvec->bv_page = NULL; + } + return; } - return; + /* + * Already linked with another pcluster, which only appears in + * crafted images by fuzzers for now. But handle this anyway. + */ + tocache = false; /* use temporary short-lived pages */ + } else { + DBG_BUGON(1); /* referenced managed folios can't be truncated */ + tocache = true; } - - /* - * It has been truncated, so it's unsafe to reuse this one. Let's - * allocate a new page for compressed data. - */ - DBG_BUGON(folio->mapping); - tocache = true; folio_unlock(folio); folio_put(folio); out_allocfolio: - zbv.page = erofs_allocpage(&f->pagepool, gfp | __GFP_NOFAIL); + page = __erofs_allocpage(&f->pagepool, gfp, true); spin_lock(&pcl->obj.lockref.lock); - if (pcl->compressed_bvecs[nr].page) { - erofs_pagepool_add(&f->pagepool, zbv.page); + if (unlikely(pcl->compressed_bvecs[nr].page != zbv.page)) { + if (page) + erofs_pagepool_add(&f->pagepool, page); spin_unlock(&pcl->obj.lockref.lock); cond_resched(); goto repeat; } - bvec->bv_page = pcl->compressed_bvecs[nr].page = zbv.page; - folio = page_folio(zbv.page); - /* first mark it as a temporary shortlived folio (now 1 ref) */ - folio->private = (void *)Z_EROFS_SHORTLIVED_PAGE; + pcl->compressed_bvecs[nr].page = page ? page : ERR_PTR(-ENOMEM); spin_unlock(&pcl->obj.lockref.lock); + bvec->bv_page = page; + if (!page) + return; + folio = page_folio(page); out_tocache: if (!tocache || bs != PAGE_SIZE || - filemap_add_folio(mc, folio, pcl->obj.index + nr, gfp)) + filemap_add_folio(mc, folio, pcl->obj.index + nr, gfp)) { + /* turn into a temporary shortlived folio (1 ref) */ + folio->private = (void *)Z_EROFS_SHORTLIVED_PAGE; return; + } folio_attach_private(folio, pcl); /* drop a refcount added by allocpage (then 2 refs in total here) */ folio_put(folio); @@ -1647,17 +1610,16 @@ static void z_erofs_submit_queue(struct z_erofs_decompress_frontend *f, cur = mdev.m_pa; end = cur + pcl->pclustersize; do { - z_erofs_fill_bio_vec(&bvec, f, pcl, i++, mc); - if (!bvec.bv_page) - continue; - + bvec.bv_page = NULL; if (bio && (cur != last_pa || bio->bi_bdev != mdev.m_bdev)) { -io_retry: - if (!erofs_is_fscache_mode(sb)) - submit_bio(bio); - else +drain_io: + if (erofs_is_fileio_mode(EROFS_SB(sb))) + erofs_fileio_submit_bio(bio); + else if (erofs_is_fscache_mode(sb)) erofs_fscache_submit_bio(bio); + else + submit_bio(bio); if (memstall) { psi_memstall_leave(&pflags); @@ -1666,6 +1628,15 @@ io_retry: bio = NULL; } + if (!bvec.bv_page) { + z_erofs_fill_bio_vec(&bvec, f, pcl, i++, mc); + if (!bvec.bv_page) + continue; + if (cur + bvec.bv_len > end) + bvec.bv_len = end - cur; + DBG_BUGON(bvec.bv_len < sb->s_blocksize); + } + if (unlikely(PageWorkingset(bvec.bv_page)) && !memstall) { psi_memstall_enter(&pflags); @@ -1673,10 +1644,13 @@ io_retry: } if (!bio) { - bio = erofs_is_fscache_mode(sb) ? - erofs_fscache_bio_alloc(&mdev) : - bio_alloc(mdev.m_bdev, BIO_MAX_VECS, - REQ_OP_READ, GFP_NOIO); + if (erofs_is_fileio_mode(EROFS_SB(sb))) + bio = erofs_fileio_bio_alloc(&mdev); + else if (erofs_is_fscache_mode(sb)) + bio = erofs_fscache_bio_alloc(&mdev); + else + bio = bio_alloc(mdev.m_bdev, BIO_MAX_VECS, + REQ_OP_READ, GFP_NOIO); bio->bi_end_io = z_erofs_endio; bio->bi_iter.bi_sector = cur >> 9; bio->bi_private = q[JQ_SUBMIT]; @@ -1685,13 +1659,9 @@ io_retry: ++nr_bios; } - if (cur + bvec.bv_len > end) - bvec.bv_len = end - cur; - DBG_BUGON(bvec.bv_len < sb->s_blocksize); if (!bio_add_page(bio, bvec.bv_page, bvec.bv_len, bvec.bv_offset)) - goto io_retry; - + goto drain_io; last_pa = cur + bvec.bv_len; bypass = false; } while ((cur += bvec.bv_len) < end); @@ -1703,10 +1673,12 @@ io_retry: } while (owned_head != Z_EROFS_PCLUSTER_TAIL); if (bio) { - if (!erofs_is_fscache_mode(sb)) - submit_bio(bio); - else + if (erofs_is_fileio_mode(EROFS_SB(sb))) + erofs_fileio_submit_bio(bio); + else if (erofs_is_fscache_mode(sb)) erofs_fscache_submit_bio(bio); + else + submit_bio(bio); if (memstall) psi_memstall_leave(&pflags); } @@ -1722,26 +1694,28 @@ io_retry: z_erofs_decompress_kickoff(q[JQ_SUBMIT], nr_bios); } -static void z_erofs_runqueue(struct z_erofs_decompress_frontend *f, - bool force_fg, bool ra) +static int z_erofs_runqueue(struct z_erofs_decompress_frontend *f, + unsigned int ra_folios) { struct z_erofs_decompressqueue io[NR_JOBQUEUES]; + struct erofs_sb_info *sbi = EROFS_I_SB(f->inode); + bool force_fg = z_erofs_is_sync_decompress(sbi, ra_folios); + int err; if (f->owned_head == Z_EROFS_PCLUSTER_TAIL) - return; - z_erofs_submit_queue(f, io, &force_fg, ra); + return 0; + z_erofs_submit_queue(f, io, &force_fg, !!ra_folios); /* handle bypass queue (no i/o pclusters) immediately */ - z_erofs_decompress_queue(&io[JQ_BYPASS], &f->pagepool); - + err = z_erofs_decompress_queue(&io[JQ_BYPASS], &f->pagepool); if (!force_fg) - return; + return err; /* wait until all bios are completed */ wait_for_completion_io(&io[JQ_SUBMIT].u.done); /* handle synchronous decompress queue in the caller context */ - z_erofs_decompress_queue(&io[JQ_SUBMIT], &f->pagepool); + return z_erofs_decompress_queue(&io[JQ_SUBMIT], &f->pagepool) ?: err; } /* @@ -1803,7 +1777,6 @@ static void z_erofs_pcluster_readmore(struct z_erofs_decompress_frontend *f, static int z_erofs_read_folio(struct file *file, struct folio *folio) { struct inode *const inode = folio->mapping->host; - struct erofs_sb_info *const sbi = EROFS_I_SB(inode); struct z_erofs_decompress_frontend f = DECOMPRESS_FRONTEND_INIT(inode); int err; @@ -1815,9 +1788,8 @@ static int z_erofs_read_folio(struct file *file, struct folio *folio) z_erofs_pcluster_readmore(&f, NULL, false); z_erofs_pcluster_end(&f); - /* if some compressed cluster ready, need submit them anyway */ - z_erofs_runqueue(&f, z_erofs_is_sync_decompress(sbi, 0), false); - + /* if some pclusters are ready, need submit them anyway */ + err = z_erofs_runqueue(&f, 0) ?: err; if (err && err != -EINTR) erofs_err(inode->i_sb, "read error %d @ %lu of nid %llu", err, folio->index, EROFS_I(inode)->nid); @@ -1830,7 +1802,6 @@ static int z_erofs_read_folio(struct file *file, struct folio *folio) static void z_erofs_readahead(struct readahead_control *rac) { struct inode *const inode = rac->mapping->host; - struct erofs_sb_info *const sbi = EROFS_I_SB(inode); struct z_erofs_decompress_frontend f = DECOMPRESS_FRONTEND_INIT(inode); struct folio *head = NULL, *folio; unsigned int nr_folios; @@ -1860,7 +1831,7 @@ static void z_erofs_readahead(struct readahead_control *rac) z_erofs_pcluster_readmore(&f, rac, false); z_erofs_pcluster_end(&f); - z_erofs_runqueue(&f, z_erofs_is_sync_decompress(sbi, nr_folios), true); + (void)z_erofs_runqueue(&f, nr_folios); erofs_put_metabuf(&f.map.buf); erofs_release_pages(&f.pagepool); } |