From nobody Sun Apr 28 11:56:13 2024 X-Original-To: dev-commits-doc-all@mlmmj.nyi.freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2610:1c1:1:606c::19:1]) by mlmmj.nyi.freebsd.org (Postfix) with ESMTP id 4VS4g13dWZz5J5vQ for ; Sun, 28 Apr 2024 11:56:13 +0000 (UTC) (envelope-from git@FreeBSD.org) Received: from mxrelay.nyi.freebsd.org (mxrelay.nyi.freebsd.org [IPv6:2610:1c1:1:606c::19:3]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (4096 bits) server-digest SHA256 client-signature RSA-PSS (4096 bits) client-digest SHA256) (Client CN "mxrelay.nyi.freebsd.org", Issuer "R3" (verified OK)) by mx1.freebsd.org (Postfix) with ESMTPS id 4VS4g12rBRz4VBg; Sun, 28 Apr 2024 11:56:13 +0000 (UTC) (envelope-from git@FreeBSD.org) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=freebsd.org; s=dkim; t=1714305373; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:mime-version:mime-version:content-type:content-type: content-transfer-encoding:content-transfer-encoding; bh=ZUeDy9kEb4BLpPvo0UgRt+ePyKLMFjkCM8zYLMyfhYg=; b=RfgAgY4YpCL5WtzwXHn6ba7gwhFYT0TdRKTHEJaqlxDBWny3KKbQP5nyPldKE0XmFFH+rd ZFUSaxHcS+C5X4hPmauzotqeuPaqAfrMPLqK/svort9v2PSp87Z7lmyGx77/wxEdTAWqGM CW0OID/6pcKL+Dd7t4nnFkFrI13aiSUFEj4Xk8nrTNZvTtTQj9Sza9fkLcARrJMR5mv6sJ HBA+ImD8ufahTe/aOQoMHThs+03sJVJYvNHxnUvMZF4c+dGDcp2434K4/QkEdXzq995obQ NV+sHztC9n44X/JdX/qi9npfmjZ0qa8KhfPH5aPN2wc4flDZY5V3/9upOt6Q2w== ARC-Seal: i=1; s=dkim; d=freebsd.org; t=1714305373; a=rsa-sha256; cv=none; b=EtxjsLQUlVIVMwqdJdUMd5DWtZwUXa7dhyYIS+EZMq8OBitfCDsawX54asYyWxlhL7TKVM e0z6oiUOB+uQMjUx8LLw+8BjomthDm0+vEhFHkV1uC089wNeZZWHvJf7VgqrNqqZlVXLY7 Z6G5yUb3aONFITpZrQICk4eMVJgprUSGQTKosF8ebE28MEU40KlTKwc/SISEveajxMdvvl k96gtZsyYGkST5upG/FPzOhXHXP/kr7KGi5axr9tJ0ySfbJTFFRe2b8YMF5qY9mQp9hv3i ZWeXYgpODKovsCVNgRsCe/l9zK0TSqBn9lmx6i9PSU1R7jeNq8hKI90JmtkHhA== ARC-Authentication-Results: i=1; mx1.freebsd.org; none ARC-Message-Signature: i=1; a=rsa-sha256; c=relaxed/relaxed; d=freebsd.org; s=dkim; t=1714305373; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:mime-version:mime-version:content-type:content-type: content-transfer-encoding:content-transfer-encoding; bh=ZUeDy9kEb4BLpPvo0UgRt+ePyKLMFjkCM8zYLMyfhYg=; b=mge3v75Cr1GzXBN+o3YVz8cpoOj2jkkfACBakP7YUkYONn7PVl/Q2rI4Nfxi2JTUImDi+t L9QVzwUN8r4a9v268AbYE3dt6olvVZVGTFKHAEhP6jx5Rr1WzV5N0q/6oDwgWsQXsl5a5i IhuFT10ZFMwRLjvfLNTFGCAQnnhHXGBBytnZE9/4YRq7DwgWPxFAthxu4UkkB92OQzFD9U ke687YTNB6CiIpn1wXUvw9rRMHek+IHVWYZpAojVwjUD06XZyMAoeEYmfwcEePZMeReS8k IwnC2aM2PoW/cAZNRP34V/XjH59syfEJhknCuI13sD0t0lS/v4IRi3o07nhbEg== Received: from gitrepo.freebsd.org (gitrepo.freebsd.org [IPv6:2610:1c1:1:6068::e6a:5]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (4096 bits) server-digest SHA256) (Client did not present a certificate) by mxrelay.nyi.freebsd.org (Postfix) with ESMTPS id 4VS4g12SHrzPW8; Sun, 28 Apr 2024 11:56:13 +0000 (UTC) (envelope-from git@FreeBSD.org) Received: from gitrepo.freebsd.org ([127.0.1.44]) by gitrepo.freebsd.org (8.17.1/8.17.1) with ESMTP id 43SBuDg5066462; Sun, 28 Apr 2024 11:56:13 GMT (envelope-from git@gitrepo.freebsd.org) Received: (from git@localhost) by gitrepo.freebsd.org (8.17.1/8.17.1/Submit) id 43SBuDCc066459; Sun, 28 Apr 2024 11:56:13 GMT (envelope-from git) Date: Sun, 28 Apr 2024 11:56:13 GMT Message-Id: <202404281156.43SBuDCc066459@gitrepo.freebsd.org> To: doc-committers@FreeBSD.org, dev-commits-doc-all@FreeBSD.org From: Wolfram Schneider Subject: git: a59104fb3c - main - rewrite parser for italic/bold words List-Id: Commit messages for all branches of the doc repository List-Archive: https://lists.freebsd.org/archives/dev-commits-doc-all List-Help: List-Post: List-Subscribe: List-Unsubscribe: X-BeenThere: dev-commits-doc-all@freebsd.org Sender: owner-dev-commits-doc-all@FreeBSD.org MIME-Version: 1.0 Content-Type: text/plain; charset=utf-8 Content-Transfer-Encoding: 8bit X-Git-Committer: wosch X-Git-Repository: doc X-Git-Refname: refs/heads/main X-Git-Reftype: branch X-Git-Commit: a59104fb3ce0551abb18540b6f5f76cab57f3424 Auto-Submitted: auto-generated The branch main has been updated by wosch: URL: https://cgit.FreeBSD.org/doc/commit/?id=a59104fb3ce0551abb18540b6f5f76cab57f3424 commit a59104fb3ce0551abb18540b6f5f76cab57f3424 Author: Wolfram Schneider AuthorDate: 2024-04-28 11:55:58 +0000 Commit: Wolfram Schneider CommitDate: 2024-04-28 11:55:58 +0000 rewrite parser for italic/bold words this should fix the issues with underline links and hyphens PR: 275000, 235567 --- website/content/en/cgi/man.cgi | 156 ++++++++++++++++++++++++++++++++++++----- 1 file changed, 138 insertions(+), 18 deletions(-) diff --git a/website/content/en/cgi/man.cgi b/website/content/en/cgi/man.cgi index 24c11be53b..8ddfa7f27f 100755 --- a/website/content/en/cgi/man.cgi +++ b/website/content/en/cgi/man.cgi @@ -1999,34 +1999,154 @@ sub encode_attribute { $_; } +sub escape_word { + my $word = shift; + + return join( '', map { escape_char($_) } @$word ); +} + +sub escape_char { + my $c = shift; + + return + $c eq '&' ? "&" + : $c eq '<' ? "<" + : $c eq '>' ? ">" + : $c eq '_BULLET_ITEM_' ? "•" + : $c; +} + +sub tag_ib { + my $tag = shift; + my $word = shift; + + my $data = escape_word($word); + + return + $tag eq 'ib' ? "$data" + : $tag eq 'b' ? "$data" + : $tag eq 'i' ? "$data" + : $data; +} + # encode unknown text data for using as HTML, # treats ^H as overstrike ala nroff. sub encode_data { - local ($_) = @_; - local ($str); + my $line = shift; - # Escape &, < and > - s,\010[><&],,g; - s/\&/\&\;/g; - s/\/\>\;/g; + # optimize for speed: most lines have no special characters + if ($line !~ /[<>&\010]/) { + return $line; + } - # bold bullet - s,\+\010\+\010o\010o,o,g; + # work on a list of characters + my @l = split( '', $line ); - # underline: _^H.^H(.) - s,((_\010[^_]\010.)+),($str = $1) =~ s/_\010..//g; "$str";,ge; + my $data = ""; + my $flag = ""; + my @word = (); - # italic: _^H(.) - s,((_\010[^_])+),($str = $1) =~ s/.\010//g; "$str";,ge; + my $end_of_word = sub { + my $new_flag = shift; - # bold: .^H(.) - s,(([^_]\010.)+),($str = $1) =~ s/.\010//g; "$str";,ge; + return if !scalar(@word); - # cleanup all the rest - s,.\010,,g; + # a tag ended, and a new started immediately + if ( $flag ne "" && $new_flag ne $flag ) { + $data .= tag_ib( $flag, \@word ); + @word = (); + } + }; - $_; + for ( my $i = 0 ; $i <= $#l ; $i++ ) { + + # 7 characters: +^H+^Ho^Ho - bullet list + if ( $i <= ( $#l - 6 ) + && $l[$i] eq "+" + && $l[ $i + 1 ] eq "\010" + && $l[ $i + 2 ] eq "+" + && $l[ $i + 3 ] eq "\010" + && $l[ $i + 4 ] eq "o" + && $l[ $i + 5 ] eq "\010" + && $l[ $i + 6 ] eq "o" ) + { + push @word, '_BULLET_ITEM_'; + $i += 6; + $flag = 'b'; + } + + # 2 characters: +^Ho - bullet list + elsif ( $i <= ( $#l - 2 ) + && $l[$i] eq "+" + && $l[ $i + 1 ] eq "\010" + && $l[ $i + 2 ] eq "o" ) + { + push @word, '_BULLET_ITEM_'; + $i += 2; + $flag = 'b'; + } + + # 5 characters: _\010x\010x - bold and italic + elsif ($i <= ( $#l - 4 ) + && $l[ $i + 1 ] eq "\010" + && $l[ $i + 3 ] eq "\010" + && $l[ $i + 2 ] eq $l[ $i + 4 ] ) + { + $end_of_word->('ib'); + push @word, $l[ $i + 2 ]; + $i += 4; + $flag = 'ib'; + } + + # 3 characters: _\010 - bold or italic + elsif ( $i <= ( $#l - 2 ) && $l[ $i + 1 ] eq "\010" ) { + + # bold + # take care of links with underlines, which are alwasy italic + if ( $l[$i] eq $l[ $i + 2 ] && $flag ne 'i' ) { + $end_of_word->('b'); + push @word, $l[$i]; + $i += 2; + $flag = 'b'; + + #printf STDERR 'B'; + } + + # italic + elsif ( $l[$i] eq "_" && $i + 2 <= $#l ) { + $end_of_word->('i'); + push @word, $l[ $i + 2 ]; + $i += 2; + $flag = 'i'; + + #printf STDERR 'I'; + } + } + + # other, one or two characters + else { + # italic/bold ends here + $end_of_word->('ANY'); + + # simple backslash + if ( $l[$i] eq "\010" ) { + + # just ignore + } + elsif ( $i <= ( $#l - 1 ) && $l[ $i + 1 ] eq "\010" ) { + $i++; + } + else { + $data .= escape_char( $l[$i] ); + } + $flag = ""; + } + } + + # last character + $end_of_word->('ANY'); + + return $data; } sub indexpage {