Re: git: cd25b0f740f8 - main - zfs: cherry-pick fix from openzfs

From: Martin Matuska <mm_at_FreeBSD.org>
Date: Thu, 10 Aug 2023 15:55:17 UTC
Hi Mateusz,

I am very sorry but we have to wait with full merges until 14-stable is 
branched.

After branching:
main will continue to receive updates from OpenZFS master branch
stable/14 will be updated from OpenZFS zfs-2.2-release branch

Until then everything we can do is cherry-pick commits that fix serious 
issues
everything else will make the merge management unnecessarily complicated.

Cheers,
mm

On 10. 8. 2023 16:23, Mateusz Guzik wrote:
> On 8/10/23, Martin Matuska <mm@freebsd.org> wrote:
>> The branch main has been updated by mm:
>>
>> URL:
>> https://cgit.FreeBSD.org/src/commit/?id=cd25b0f740f8c846562fd66e7380437eb898c875
>>
>> commit cd25b0f740f8c846562fd66e7380437eb898c875
>> Author:     Martin Matuska <mm@FreeBSD.org>
>> AuthorDate: 2023-08-10 07:55:42 +0000
>> Commit:     Martin Matuska <mm@FreeBSD.org>
>> CommitDate: 2023-08-10 07:56:53 +0000
>>
>>      zfs: cherry-pick fix from openzfs
>>
>>      Vendor PR:
>>        #15080 ZIL: Fix config lock deadlock
>>
>>      Obtained from:  OpenZFS
>>      OpenZFS commit: 2cb992a99ccadb78d97049b40bd442eb4fdc549d
>>
>>      Note: full vendor imports will continue when stable/14 has been
>> branched
> First a stylistic issue:
> Can you please use upstream one-liner, maybe prefixed with zfs:. For
> this commit it would be:
> zfs: ZIL: Fix config lock deadlock.
>
> then copy their commit message
>
> For example see
> https://cgit.freebsd.org/src/commit/?id=d09a955a605d03471c5ab7bd17b8a6186fdc148c
>
> A not stylistic issue is the delay between upstream fixes and them
> getting merged into FreeBSD. For example the commit at hand is over 2
> weeks old and I presume this merge got only prompted by des@ running a
> zfs-related deadlock.
>
> We really should be getting timely updates without local people
> running into them.
>
>> ---
>>   sys/contrib/openzfs/module/zfs/zil.c | 34
>> +++++++++++++++++++++++++++-------
>>   1 file changed, 27 insertions(+), 7 deletions(-)
>>
>> diff --git a/sys/contrib/openzfs/module/zfs/zil.c
>> b/sys/contrib/openzfs/module/zfs/zil.c
>> index 00d66a2481d7..af7137faaccf 100644
>> --- a/sys/contrib/openzfs/module/zfs/zil.c
>> +++ b/sys/contrib/openzfs/module/zfs/zil.c
>> @@ -151,6 +151,7 @@ static kmem_cache_t *zil_lwb_cache;
>>   static kmem_cache_t *zil_zcw_cache;
>>
>>   static void zil_lwb_commit(zilog_t *zilog, lwb_t *lwb, itx_t *itx);
>> +static void zil_lwb_write_issue(zilog_t *zilog, lwb_t *lwb);
>>   static itx_t *zil_itx_clone(itx_t *oitx);
>>
>>   static int
>> @@ -1768,7 +1769,7 @@ static uint_t zil_maxblocksize =
>> SPA_OLD_MAXBLOCKSIZE;
>>    * Has to be called under zl_issuer_lock to chain more lwbs.
>>    */
>>   static lwb_t *
>> -zil_lwb_write_close(zilog_t *zilog, lwb_t *lwb)
>> +zil_lwb_write_close(zilog_t *zilog, lwb_t *lwb, list_t *ilwbs)
>>   {
>>   	lwb_t *nlwb = NULL;
>>   	zil_chain_t *zilc;
>> @@ -1870,6 +1871,27 @@ zil_lwb_write_close(zilog_t *zilog, lwb_t *lwb)
>>
>>   	dmu_tx_commit(tx);
>>
>> +	/*
>> +	 * We need to acquire the config lock for the lwb to issue it later.
>> +	 * However, if we already have a queue of closed parent lwbs already
>> +	 * holding the config lock (but not yet issued), we can't block here
>> +	 * waiting on the lock or we will deadlock.  In that case we must
>> +	 * first issue to parent IOs before waiting on the lock.
>> +	 */
>> +	if (ilwbs && !list_is_empty(ilwbs)) {
>> +		if (!spa_config_tryenter(spa, SCL_STATE, lwb, RW_READER)) {
>> +			lwb_t *tlwb;
>> +			while ((tlwb = list_remove_head(ilwbs)) != NULL)
>> +				zil_lwb_write_issue(zilog, tlwb);
>> +			spa_config_enter(spa, SCL_STATE, lwb, RW_READER);
>> +		}
>> +	} else {
>> +		spa_config_enter(spa, SCL_STATE, lwb, RW_READER);
>> +	}
>> +
>> +	if (ilwbs)
>> +		list_insert_tail(ilwbs, lwb);
>> +
>>   	/*
>>   	 * If there was an allocation failure then nlwb will be null which
>>   	 * forces a txg_wait_synced().
>> @@ -1933,7 +1955,7 @@ zil_lwb_write_issue(zilog_t *zilog, lwb_t *lwb)
>>   		ZIL_STAT_INCR(zilog, zil_itx_metaslab_normal_alloc,
>>   		    BP_GET_LSIZE(&lwb->lwb_blk));
>>   	}
>> -	spa_config_enter(zilog->zl_spa, SCL_STATE, lwb, RW_READER);
>> +	ASSERT(spa_config_held(zilog->zl_spa, SCL_STATE, RW_READER));
>>   	zil_lwb_add_block(lwb, &lwb->lwb_blk);
>>   	lwb->lwb_issued_timestamp = gethrtime();
>>   	zio_nowait(lwb->lwb_root_zio);
>> @@ -2037,8 +2059,7 @@ cont:
>>   	    lwb_sp < zil_max_waste_space(zilog) &&
>>   	    (dlen % max_log_data == 0 ||
>>   	    lwb_sp < reclen + dlen % max_log_data))) {
>> -		list_insert_tail(ilwbs, lwb);
>> -		lwb = zil_lwb_write_close(zilog, lwb);
>> +		lwb = zil_lwb_write_close(zilog, lwb, ilwbs);
>>   		if (lwb == NULL)
>>   			return (NULL);
>>   		zil_lwb_write_open(zilog, lwb);
>> @@ -2937,8 +2958,7 @@ zil_process_commit_list(zilog_t *zilog,
>> zil_commit_waiter_t *zcw, list_t *ilwbs)
>>   			    zfs_commit_timeout_pct / 100;
>>   			if (sleep < zil_min_commit_timeout ||
>>   			    lwb->lwb_sz - lwb->lwb_nused < lwb->lwb_sz / 8) {
>> -				list_insert_tail(ilwbs, lwb);
>> -				lwb = zil_lwb_write_close(zilog, lwb);
>> +				lwb = zil_lwb_write_close(zilog, lwb, ilwbs);
>>   				zilog->zl_cur_used = 0;
>>   				if (lwb == NULL) {
>>   					while ((lwb = list_remove_head(ilwbs))
>> @@ -3096,7 +3116,7 @@ zil_commit_waiter_timeout(zilog_t *zilog,
>> zil_commit_waiter_t *zcw)
>>   	 * since we've reached the commit waiter's timeout and it still
>>   	 * hasn't been issued.
>>   	 */
>> -	lwb_t *nlwb = zil_lwb_write_close(zilog, lwb);
>> +	lwb_t *nlwb = zil_lwb_write_close(zilog, lwb, NULL);
>>
>>   	ASSERT3S(lwb->lwb_state, !=, LWB_STATE_OPENED);
>>
>>
>