git: a59104fb3c - main - rewrite parser for italic/bold words
- Go to: [ bottom of page ] [ top of archives ] [ this month ]
Date: Sun, 28 Apr 2024 11:56:13 UTC
The branch main has been updated by wosch: URL: https://cgit.FreeBSD.org/doc/commit/?id=a59104fb3ce0551abb18540b6f5f76cab57f3424 commit a59104fb3ce0551abb18540b6f5f76cab57f3424 Author: Wolfram Schneider <wosch@FreeBSD.org> AuthorDate: 2024-04-28 11:55:58 +0000 Commit: Wolfram Schneider <wosch@FreeBSD.org> CommitDate: 2024-04-28 11:55:58 +0000 rewrite parser for italic/bold words this should fix the issues with underline links and hyphens PR: 275000, 235567 --- website/content/en/cgi/man.cgi | 156 ++++++++++++++++++++++++++++++++++++----- 1 file changed, 138 insertions(+), 18 deletions(-) diff --git a/website/content/en/cgi/man.cgi b/website/content/en/cgi/man.cgi index 24c11be53b..8ddfa7f27f 100755 --- a/website/content/en/cgi/man.cgi +++ b/website/content/en/cgi/man.cgi @@ -1999,34 +1999,154 @@ sub encode_attribute { $_; } +sub escape_word { + my $word = shift; + + return join( '', map { escape_char($_) } @$word ); +} + +sub escape_char { + my $c = shift; + + return + $c eq '&' ? "&" + : $c eq '<' ? "<" + : $c eq '>' ? ">" + : $c eq '_BULLET_ITEM_' ? "•" + : $c; +} + +sub tag_ib { + my $tag = shift; + my $word = shift; + + my $data = escape_word($word); + + return + $tag eq 'ib' ? "<i><b>$data</b></i>" + : $tag eq 'b' ? "<b>$data</b>" + : $tag eq 'i' ? "<i>$data</i>" + : $data; +} + # encode unknown text data for using as HTML, # treats ^H as overstrike ala nroff. sub encode_data { - local ($_) = @_; - local ($str); + my $line = shift; - # Escape &, < and > - s,\010[><&],,g; - s/\&/\&\;/g; - s/\</\<\;/g; - s/\>/\>\;/g; + # optimize for speed: most lines have no special characters + if ($line !~ /[<>&\010]/) { + return $line; + } - # bold bullet - s,\+\010\+\010o\010o,<b>o</b>,g; + # work on a list of characters + my @l = split( '', $line ); - # underline: _^H.^H(.) - s,((_\010[^_]\010.)+),($str = $1) =~ s/_\010..//g; "<I>$str</I>";,ge; + my $data = ""; + my $flag = ""; + my @word = (); - # italic: _^H(.) - s,((_\010[^_])+),($str = $1) =~ s/.\010//g; "<i>$str</i>";,ge; + my $end_of_word = sub { + my $new_flag = shift; - # bold: .^H(.) - s,(([^_]\010.)+),($str = $1) =~ s/.\010//g; "<b>$str</b>";,ge; + return if !scalar(@word); - # cleanup all the rest - s,.\010,,g; + # a tag ended, and a new started immediately + if ( $flag ne "" && $new_flag ne $flag ) { + $data .= tag_ib( $flag, \@word ); + @word = (); + } + }; - $_; + for ( my $i = 0 ; $i <= $#l ; $i++ ) { + + # 7 characters: +^H+^Ho^Ho - bullet list + if ( $i <= ( $#l - 6 ) + && $l[$i] eq "+" + && $l[ $i + 1 ] eq "\010" + && $l[ $i + 2 ] eq "+" + && $l[ $i + 3 ] eq "\010" + && $l[ $i + 4 ] eq "o" + && $l[ $i + 5 ] eq "\010" + && $l[ $i + 6 ] eq "o" ) + { + push @word, '_BULLET_ITEM_'; + $i += 6; + $flag = 'b'; + } + + # 2 characters: +^Ho - bullet list + elsif ( $i <= ( $#l - 2 ) + && $l[$i] eq "+" + && $l[ $i + 1 ] eq "\010" + && $l[ $i + 2 ] eq "o" ) + { + push @word, '_BULLET_ITEM_'; + $i += 2; + $flag = 'b'; + } + + # 5 characters: _\010x\010x - bold and italic + elsif ($i <= ( $#l - 4 ) + && $l[ $i + 1 ] eq "\010" + && $l[ $i + 3 ] eq "\010" + && $l[ $i + 2 ] eq $l[ $i + 4 ] ) + { + $end_of_word->('ib'); + push @word, $l[ $i + 2 ]; + $i += 4; + $flag = 'ib'; + } + + # 3 characters: _\010 - bold or italic + elsif ( $i <= ( $#l - 2 ) && $l[ $i + 1 ] eq "\010" ) { + + # bold + # take care of links with underlines, which are alwasy italic + if ( $l[$i] eq $l[ $i + 2 ] && $flag ne 'i' ) { + $end_of_word->('b'); + push @word, $l[$i]; + $i += 2; + $flag = 'b'; + + #printf STDERR 'B'; + } + + # italic + elsif ( $l[$i] eq "_" && $i + 2 <= $#l ) { + $end_of_word->('i'); + push @word, $l[ $i + 2 ]; + $i += 2; + $flag = 'i'; + + #printf STDERR 'I'; + } + } + + # other, one or two characters + else { + # italic/bold ends here + $end_of_word->('ANY'); + + # simple backslash + if ( $l[$i] eq "\010" ) { + + # just ignore + } + elsif ( $i <= ( $#l - 1 ) && $l[ $i + 1 ] eq "\010" ) { + $i++; + } + else { + $data .= escape_char( $l[$i] ); + } + $flag = ""; + } + } + + # last character + $end_of_word->('ANY'); + + return $data; } sub indexpage {