Re: git: cd25b0f740f8 - main - zfs: cherry-pick fix from openzfs
Date: Thu, 10 Aug 2023 14:23:50 UTC
On 8/10/23, Martin Matuska <mm@freebsd.org> wrote: > The branch main has been updated by mm: > > URL: > https://cgit.FreeBSD.org/src/commit/?id=cd25b0f740f8c846562fd66e7380437eb898c875 > > commit cd25b0f740f8c846562fd66e7380437eb898c875 > Author: Martin Matuska <mm@FreeBSD.org> > AuthorDate: 2023-08-10 07:55:42 +0000 > Commit: Martin Matuska <mm@FreeBSD.org> > CommitDate: 2023-08-10 07:56:53 +0000 > > zfs: cherry-pick fix from openzfs > > Vendor PR: > #15080 ZIL: Fix config lock deadlock > > Obtained from: OpenZFS > OpenZFS commit: 2cb992a99ccadb78d97049b40bd442eb4fdc549d > > Note: full vendor imports will continue when stable/14 has been > branched First a stylistic issue: Can you please use upstream one-liner, maybe prefixed with zfs:. For this commit it would be: zfs: ZIL: Fix config lock deadlock. then copy their commit message For example see https://cgit.freebsd.org/src/commit/?id=d09a955a605d03471c5ab7bd17b8a6186fdc148c A not stylistic issue is the delay between upstream fixes and them getting merged into FreeBSD. For example the commit at hand is over 2 weeks old and I presume this merge got only prompted by des@ running a zfs-related deadlock. We really should be getting timely updates without local people running into them. > --- > sys/contrib/openzfs/module/zfs/zil.c | 34 > +++++++++++++++++++++++++++------- > 1 file changed, 27 insertions(+), 7 deletions(-) > > diff --git a/sys/contrib/openzfs/module/zfs/zil.c > b/sys/contrib/openzfs/module/zfs/zil.c > index 00d66a2481d7..af7137faaccf 100644 > --- a/sys/contrib/openzfs/module/zfs/zil.c > +++ b/sys/contrib/openzfs/module/zfs/zil.c > @@ -151,6 +151,7 @@ static kmem_cache_t *zil_lwb_cache; > static kmem_cache_t *zil_zcw_cache; > > static void zil_lwb_commit(zilog_t *zilog, lwb_t *lwb, itx_t *itx); > +static void zil_lwb_write_issue(zilog_t *zilog, lwb_t *lwb); > static itx_t *zil_itx_clone(itx_t *oitx); > > static int > @@ -1768,7 +1769,7 @@ static uint_t zil_maxblocksize = > SPA_OLD_MAXBLOCKSIZE; > * Has to be called under zl_issuer_lock to chain more lwbs. > */ > static lwb_t * > -zil_lwb_write_close(zilog_t *zilog, lwb_t *lwb) > +zil_lwb_write_close(zilog_t *zilog, lwb_t *lwb, list_t *ilwbs) > { > lwb_t *nlwb = NULL; > zil_chain_t *zilc; > @@ -1870,6 +1871,27 @@ zil_lwb_write_close(zilog_t *zilog, lwb_t *lwb) > > dmu_tx_commit(tx); > > + /* > + * We need to acquire the config lock for the lwb to issue it later. > + * However, if we already have a queue of closed parent lwbs already > + * holding the config lock (but not yet issued), we can't block here > + * waiting on the lock or we will deadlock. In that case we must > + * first issue to parent IOs before waiting on the lock. > + */ > + if (ilwbs && !list_is_empty(ilwbs)) { > + if (!spa_config_tryenter(spa, SCL_STATE, lwb, RW_READER)) { > + lwb_t *tlwb; > + while ((tlwb = list_remove_head(ilwbs)) != NULL) > + zil_lwb_write_issue(zilog, tlwb); > + spa_config_enter(spa, SCL_STATE, lwb, RW_READER); > + } > + } else { > + spa_config_enter(spa, SCL_STATE, lwb, RW_READER); > + } > + > + if (ilwbs) > + list_insert_tail(ilwbs, lwb); > + > /* > * If there was an allocation failure then nlwb will be null which > * forces a txg_wait_synced(). > @@ -1933,7 +1955,7 @@ zil_lwb_write_issue(zilog_t *zilog, lwb_t *lwb) > ZIL_STAT_INCR(zilog, zil_itx_metaslab_normal_alloc, > BP_GET_LSIZE(&lwb->lwb_blk)); > } > - spa_config_enter(zilog->zl_spa, SCL_STATE, lwb, RW_READER); > + ASSERT(spa_config_held(zilog->zl_spa, SCL_STATE, RW_READER)); > zil_lwb_add_block(lwb, &lwb->lwb_blk); > lwb->lwb_issued_timestamp = gethrtime(); > zio_nowait(lwb->lwb_root_zio); > @@ -2037,8 +2059,7 @@ cont: > lwb_sp < zil_max_waste_space(zilog) && > (dlen % max_log_data == 0 || > lwb_sp < reclen + dlen % max_log_data))) { > - list_insert_tail(ilwbs, lwb); > - lwb = zil_lwb_write_close(zilog, lwb); > + lwb = zil_lwb_write_close(zilog, lwb, ilwbs); > if (lwb == NULL) > return (NULL); > zil_lwb_write_open(zilog, lwb); > @@ -2937,8 +2958,7 @@ zil_process_commit_list(zilog_t *zilog, > zil_commit_waiter_t *zcw, list_t *ilwbs) > zfs_commit_timeout_pct / 100; > if (sleep < zil_min_commit_timeout || > lwb->lwb_sz - lwb->lwb_nused < lwb->lwb_sz / 8) { > - list_insert_tail(ilwbs, lwb); > - lwb = zil_lwb_write_close(zilog, lwb); > + lwb = zil_lwb_write_close(zilog, lwb, ilwbs); > zilog->zl_cur_used = 0; > if (lwb == NULL) { > while ((lwb = list_remove_head(ilwbs)) > @@ -3096,7 +3116,7 @@ zil_commit_waiter_timeout(zilog_t *zilog, > zil_commit_waiter_t *zcw) > * since we've reached the commit waiter's timeout and it still > * hasn't been issued. > */ > - lwb_t *nlwb = zil_lwb_write_close(zilog, lwb); > + lwb_t *nlwb = zil_lwb_write_close(zilog, lwb, NULL); > > ASSERT3S(lwb->lwb_state, !=, LWB_STATE_OPENED); > > -- Mateusz Guzik <mjguzik gmail.com>