Re: git: cd25b0f740f8 - main - zfs: cherry-pick fix from openzfs
Date: Thu, 10 Aug 2023 15:55:17 UTC
Hi Mateusz, I am very sorry but we have to wait with full merges until 14-stable is branched. After branching: main will continue to receive updates from OpenZFS master branch stable/14 will be updated from OpenZFS zfs-2.2-release branch Until then everything we can do is cherry-pick commits that fix serious issues everything else will make the merge management unnecessarily complicated. Cheers, mm On 10. 8. 2023 16:23, Mateusz Guzik wrote: > On 8/10/23, Martin Matuska <mm@freebsd.org> wrote: >> The branch main has been updated by mm: >> >> URL: >> https://cgit.FreeBSD.org/src/commit/?id=cd25b0f740f8c846562fd66e7380437eb898c875 >> >> commit cd25b0f740f8c846562fd66e7380437eb898c875 >> Author: Martin Matuska <mm@FreeBSD.org> >> AuthorDate: 2023-08-10 07:55:42 +0000 >> Commit: Martin Matuska <mm@FreeBSD.org> >> CommitDate: 2023-08-10 07:56:53 +0000 >> >> zfs: cherry-pick fix from openzfs >> >> Vendor PR: >> #15080 ZIL: Fix config lock deadlock >> >> Obtained from: OpenZFS >> OpenZFS commit: 2cb992a99ccadb78d97049b40bd442eb4fdc549d >> >> Note: full vendor imports will continue when stable/14 has been >> branched > First a stylistic issue: > Can you please use upstream one-liner, maybe prefixed with zfs:. For > this commit it would be: > zfs: ZIL: Fix config lock deadlock. > > then copy their commit message > > For example see > https://cgit.freebsd.org/src/commit/?id=d09a955a605d03471c5ab7bd17b8a6186fdc148c > > A not stylistic issue is the delay between upstream fixes and them > getting merged into FreeBSD. For example the commit at hand is over 2 > weeks old and I presume this merge got only prompted by des@ running a > zfs-related deadlock. > > We really should be getting timely updates without local people > running into them. > >> --- >> sys/contrib/openzfs/module/zfs/zil.c | 34 >> +++++++++++++++++++++++++++------- >> 1 file changed, 27 insertions(+), 7 deletions(-) >> >> diff --git a/sys/contrib/openzfs/module/zfs/zil.c >> b/sys/contrib/openzfs/module/zfs/zil.c >> index 00d66a2481d7..af7137faaccf 100644 >> --- a/sys/contrib/openzfs/module/zfs/zil.c >> +++ b/sys/contrib/openzfs/module/zfs/zil.c >> @@ -151,6 +151,7 @@ static kmem_cache_t *zil_lwb_cache; >> static kmem_cache_t *zil_zcw_cache; >> >> static void zil_lwb_commit(zilog_t *zilog, lwb_t *lwb, itx_t *itx); >> +static void zil_lwb_write_issue(zilog_t *zilog, lwb_t *lwb); >> static itx_t *zil_itx_clone(itx_t *oitx); >> >> static int >> @@ -1768,7 +1769,7 @@ static uint_t zil_maxblocksize = >> SPA_OLD_MAXBLOCKSIZE; >> * Has to be called under zl_issuer_lock to chain more lwbs. >> */ >> static lwb_t * >> -zil_lwb_write_close(zilog_t *zilog, lwb_t *lwb) >> +zil_lwb_write_close(zilog_t *zilog, lwb_t *lwb, list_t *ilwbs) >> { >> lwb_t *nlwb = NULL; >> zil_chain_t *zilc; >> @@ -1870,6 +1871,27 @@ zil_lwb_write_close(zilog_t *zilog, lwb_t *lwb) >> >> dmu_tx_commit(tx); >> >> + /* >> + * We need to acquire the config lock for the lwb to issue it later. >> + * However, if we already have a queue of closed parent lwbs already >> + * holding the config lock (but not yet issued), we can't block here >> + * waiting on the lock or we will deadlock. In that case we must >> + * first issue to parent IOs before waiting on the lock. >> + */ >> + if (ilwbs && !list_is_empty(ilwbs)) { >> + if (!spa_config_tryenter(spa, SCL_STATE, lwb, RW_READER)) { >> + lwb_t *tlwb; >> + while ((tlwb = list_remove_head(ilwbs)) != NULL) >> + zil_lwb_write_issue(zilog, tlwb); >> + spa_config_enter(spa, SCL_STATE, lwb, RW_READER); >> + } >> + } else { >> + spa_config_enter(spa, SCL_STATE, lwb, RW_READER); >> + } >> + >> + if (ilwbs) >> + list_insert_tail(ilwbs, lwb); >> + >> /* >> * If there was an allocation failure then nlwb will be null which >> * forces a txg_wait_synced(). >> @@ -1933,7 +1955,7 @@ zil_lwb_write_issue(zilog_t *zilog, lwb_t *lwb) >> ZIL_STAT_INCR(zilog, zil_itx_metaslab_normal_alloc, >> BP_GET_LSIZE(&lwb->lwb_blk)); >> } >> - spa_config_enter(zilog->zl_spa, SCL_STATE, lwb, RW_READER); >> + ASSERT(spa_config_held(zilog->zl_spa, SCL_STATE, RW_READER)); >> zil_lwb_add_block(lwb, &lwb->lwb_blk); >> lwb->lwb_issued_timestamp = gethrtime(); >> zio_nowait(lwb->lwb_root_zio); >> @@ -2037,8 +2059,7 @@ cont: >> lwb_sp < zil_max_waste_space(zilog) && >> (dlen % max_log_data == 0 || >> lwb_sp < reclen + dlen % max_log_data))) { >> - list_insert_tail(ilwbs, lwb); >> - lwb = zil_lwb_write_close(zilog, lwb); >> + lwb = zil_lwb_write_close(zilog, lwb, ilwbs); >> if (lwb == NULL) >> return (NULL); >> zil_lwb_write_open(zilog, lwb); >> @@ -2937,8 +2958,7 @@ zil_process_commit_list(zilog_t *zilog, >> zil_commit_waiter_t *zcw, list_t *ilwbs) >> zfs_commit_timeout_pct / 100; >> if (sleep < zil_min_commit_timeout || >> lwb->lwb_sz - lwb->lwb_nused < lwb->lwb_sz / 8) { >> - list_insert_tail(ilwbs, lwb); >> - lwb = zil_lwb_write_close(zilog, lwb); >> + lwb = zil_lwb_write_close(zilog, lwb, ilwbs); >> zilog->zl_cur_used = 0; >> if (lwb == NULL) { >> while ((lwb = list_remove_head(ilwbs)) >> @@ -3096,7 +3116,7 @@ zil_commit_waiter_timeout(zilog_t *zilog, >> zil_commit_waiter_t *zcw) >> * since we've reached the commit waiter's timeout and it still >> * hasn't been issued. >> */ >> - lwb_t *nlwb = zil_lwb_write_close(zilog, lwb); >> + lwb_t *nlwb = zil_lwb_write_close(zilog, lwb, NULL); >> >> ASSERT3S(lwb->lwb_state, !=, LWB_STATE_OPENED); >> >> >