Re: git: 3a3c9242739e - main - checkstyle9.pl: Perl script to check if a change is approximately style(9)

From: Drew Gallatin <gallatin_at_freebsd.org>
Date: Sat, 25 Mar 2023 17:22:21 UTC
Thanks so much.  I've wanted something like this *forever*

On Sat, Mar 25, 2023, at 1:08 PM, Warner Losh wrote:
> The branch main has been updated by imp:
> 
> URL: https://cgit.FreeBSD.org/src/commit/?id=3a3c9242739efb0c76587ffbaa54c5d10b2cbcb4 <https://cgit.freebsd.org/src/commit/?id=3a3c9242739efb0c76587ffbaa54c5d10b2cbcb4>
> 
> commit 3a3c9242739efb0c76587ffbaa54c5d10b2cbcb4
> Author:     Warner Losh <imp@FreeBSD.org>
> AuthorDate: 2023-03-14 21:28:05 +0000
> Commit:     Warner Losh <imp@FreeBSD.org>
> CommitDate: 2023-03-25 17:06:13 +0000
> 
>     checkstyle9.pl: Perl script to check if a change is approximately style(9)
>     
>     This code is adapted from the QEMU checkpatch.pl script. It can check
>     either a patch, a file or a git branch. It tries to warn about things
>     that I believe might be style(9) violations. It's experimental, since I
>     heavily hacked on the qemu version to get it to not complain (much)
>     about iconic code in the tree. At the moment, it's use should be
>     considered expermental. It will likely miss violations, and complain
>     about code that's perfectly fine.  It's offered as an experiment
>     and to make it easier for contributors to submit patches.
> ---
> tools/build/checkstyle9.pl | 2748 ++++++++++++++++++++++++++++++++++++++++++++
> 1 file changed, 2748 insertions(+)
> 
> diff --git a/tools/build/checkstyle9.pl b/tools/build/checkstyle9.pl
> new file mode 100755
> index 000000000000..5aec3819bf7c
> --- /dev/null
> +++ b/tools/build/checkstyle9.pl
> @@ -0,0 +1,2748 @@
> +#!/usr/bin/env perl
> +# (c) 2001, Dave Jones. (the file handling bit)
> +# (c) 2005, Joel Schopp <jschopp@austin.ibm.com> (the ugly bit)
> +# (c) 2007,2008, Andy Whitcroft <apw@uk.ibm.com> (new conditions, test suite)
> +# (c) 2008-2010 Andy Whitcroft <apw@canonical.com>
> +# Licensed under the terms of the GNU GPL License version 2
> +
> +use strict;
> +use warnings;
> +use Term::ANSIColor qw(:constants);
> +
> +my $P = $0;
> +$P =~ s@.*/@@g;
> +
> +our $SrcFile    = qr{\.(?:h|c|cpp|s|S|pl|py|sh)$};
> +
> +my $V = '0.31';
> +
> +use Getopt::Long qw(:config no_auto_abbrev);
> +
> +my $quiet = 0;
> +my $tree = 1;
> +my $chk_signoff = 1;
> +my $chk_patch = undef;
> +my $chk_branch = undef;
> +my $tst_only;
> +my $emacs = 0;
> +my $terse = 0;
> +my $file = undef;
> +my $color = "auto";
> +my $no_warnings = 0;
> +my $summary = 1;
> +my $mailback = 0;
> +my $summary_file = 0;
> +my $root;
> +my %debug;
> +my $help = 0;
> +
> +sub help {
> + my ($exitcode) = @_;
> +
> + print << "EOM";
> +Usage:
> +
> +    $P [OPTION]... [FILE]...
> +    $P [OPTION]... [GIT-REV-LIST]
> +
> +Version: $V
> +
> +Options:
> +  -q, --quiet                quiet
> +  --patch                    treat FILE as patchfile
> +  --branch                   treat args as GIT revision list
> +  --emacs                    emacs compile window format
> +  --terse                    one line per report
> +  -f, --file                 treat FILE as regular source file
> +  --strict                   fail if only warnings are found
> +  --no-summary               suppress the per-file summary
> +  --mailback                 only produce a report in case of warnings/errors
> +  --summary-file             include the filename in summary
> +  --debug KEY=[0|1]          turn on/off debugging of KEY, where KEY is one of
> +                             'values', 'possible', 'type', and 'attr' (default
> +                             is all off)
> +  --test-only=WORD           report only warnings/errors containing WORD
> +                             literally
> +  --color[=WHEN]             Use colors 'always', 'never', or only when output
> +                             is a terminal ('auto'). Default is 'auto'.
> +  -h, --help, --version      display this help and exit
> +
> +When FILE is - read standard input.
> +EOM
> +
> + exit($exitcode);
> +}
> +
> +# Use at your own risk
> +print "\n", MAGENTA, "WARNING:", RESET, " This code is highly experimental ... likely isn't a great style(9) match yet\n\n";
> +
> +# Perl's Getopt::Long allows options to take optional arguments after a space.
> +# Prevent --color by itself from consuming other arguments
> +foreach (@ARGV) {
> + if ($_ eq "--color" || $_ eq "-color") {
> + $_ = "--color=$color";
> + }
> +}
> +
> +GetOptions(
> + 'q|quiet+' => \$quiet,
> + 'tree!' => \$tree,
> + 'signoff!' => \$chk_signoff,
> + 'patch!' => \$chk_patch,
> + 'branch!' => \$chk_branch,
> + 'emacs!' => \$emacs,
> + 'terse!' => \$terse,
> + 'f|file!' => \$file,
> + 'strict!' => \$no_warnings,
> + 'root=s' => \$root,
> + 'summary!' => \$summary,
> + 'mailback!' => \$mailback,
> + 'summary-file!' => \$summary_file,
> +
> + 'debug=s' => \%debug,
> + 'test-only=s' => \$tst_only,
> + 'color=s'       => \$color,
> + 'no-color'      => sub { $color = 'never'; },
> + 'h|help' => \$help,
> + 'version' => \$help
> +) or help(1);
> +
> +help(0) if ($help);
> +
> +my $exit = 0;
> +
> +if ($#ARGV < 0) {
> + print "$P: no input files\n";
> + exit(1);
> +}
> +
> +if (!defined $chk_branch && !defined $chk_patch && !defined $file) {
> + $chk_branch = $ARGV[0] =~ /.\.\./ ? 1 : 0;
> + $file = $ARGV[0] =~ /$SrcFile/ ? 1 : 0;
> + $chk_patch = $chk_branch || $file ? 0 : 1;
> +} elsif (!defined $chk_branch && !defined $chk_patch) {
> + if ($file) {
> + $chk_branch = $chk_patch = 0;
> + } else {
> + $chk_branch = $ARGV[0] =~ /.\.\./ ? 1 : 0;
> + $chk_patch = $chk_branch ? 0 : 1;
> + }
> +} elsif (!defined $chk_branch && !defined $file) {
> + if ($chk_patch) {
> + $chk_branch = $file = 0;
> + } else {
> + $chk_branch = $ARGV[0] =~ /.\.\./ ? 1 : 0;
> + $file = $chk_branch ? 0 : 1;
> + }
> +} elsif (!defined $chk_patch && !defined $file) {
> + if ($chk_branch) {
> + $chk_patch = $file = 0;
> + } else {
> + $file = $ARGV[0] =~ /$SrcFile/ ? 1 : 0;
> + $chk_patch = $file ? 0 : 1;
> + }
> +} elsif (!defined $chk_branch) {
> + $chk_branch = $chk_patch || $file ? 0 : 1;
> +} elsif (!defined $chk_patch) {
> + $chk_patch = $chk_branch || $file ? 0 : 1;
> +} elsif (!defined $file) {
> + $file = $chk_patch || $chk_branch ? 0 : 1;
> +}
> +
> +if (($chk_patch && $chk_branch) ||
> +    ($chk_patch && $file) ||
> +    ($chk_branch && $file)) {
> + die "Only one of --file, --branch, --patch is permitted\n";
> +}
> +if (!$chk_patch && !$chk_branch && !$file) {
> + die "One of --file, --branch, --patch is required\n";
> +}
> +
> +if ($color =~ /^always$/i) {
> + $color = 1;
> +} elsif ($color =~ /^never$/i) {
> + $color = 0;
> +} elsif ($color =~ /^auto$/i) {
> + $color = (-t STDOUT);
> +} else {
> + die "Invalid color mode: $color\n";
> +}
> +
> +my $dbg_values = 0;
> +my $dbg_possible = 0;
> +my $dbg_type = 0;
> +my $dbg_attr = 0;
> +my $dbg_adv_dcs = 0;
> +my $dbg_adv_checking = 0;
> +my $dbg_adv_apw = 0;
> +for my $key (keys %debug) {
> + ## no critic
> + eval "\${dbg_$key} = '$debug{$key}';";
> + die "$@" if ($@);
> +}
> +
> +my $rpt_cleaners = 0;
> +
> +if ($terse) {
> + $emacs = 1;
> + $quiet++;
> +}
> +
> +my $emitted_corrupt = 0;
> +
> +our $Ident = qr{
> + [A-Za-z_][A-Za-z\d_]*
> + (?:\s*\#\#\s*[A-Za-z_][A-Za-z\d_]*)*
> + }x;
> +our $Storage = qr{extern|static|asmlinkage};
> +our $Sparse = qr{
> + __force
> + }x;
> +
> +# Notes to $Attribute:
> +our $Attribute = qr{
> + const|
> + volatile|
> + QEMU_NORETURN|
> + QEMU_WARN_UNUSED_RESULT|
> + QEMU_SENTINEL|
> + QEMU_PACKED|
> + GCC_FMT_ATTR
> +   }x;
> +our $Modifier;
> +our $Inline = qr{inline};
> +our $Member = qr{->$Ident|\.$Ident|\[[^]]*\]};
> +our $Lval = qr{$Ident(?:$Member)*};
> +
> +our $Constant = qr{(?:[0-9]+|0x[0-9a-fA-F]+)[UL]*};
> +our $Assignment = qr{(?:\*\=|/=|%=|\+=|-=|<<=|>>=|&=|\^=|\|=|=)};
> +our $Compare    = qr{<=|>=|==|!=|<|>};
> +our $Operators = qr{
> + <=|>=|==|!=|
> + =>|->|<<|>>|<|>|!|~|
> + &&|\|\||,|\^|\+\+|--|&|\||\+|-|\*|\/|%
> +   }x;
> +
> +our $NonptrType;
> +our $Type;
> +our $Declare;
> +
> +our $NON_ASCII_UTF8 = qr{
> + [\xC2-\xDF][\x80-\xBF]               # non-overlong 2-byte
> + |  \xE0[\xA0-\xBF][\x80-\xBF]        # excluding overlongs
> + | [\xE1-\xEC\xEE\xEF][\x80-\xBF]{2}  # straight 3-byte
> + |  \xED[\x80-\x9F][\x80-\xBF]        # excluding surrogates
> + |  \xF0[\x90-\xBF][\x80-\xBF]{2}     # planes 1-3
> + | [\xF1-\xF3][\x80-\xBF]{3}          # planes 4-15
> + |  \xF4[\x80-\x8F][\x80-\xBF]{2}     # plane 16
> +}x;
> +
> +our $UTF8 = qr{
> + [\x09\x0A\x0D\x20-\x7E]              # ASCII
> + | $NON_ASCII_UTF8
> +}x;
> +
> +# some readers default to ISO-8859-1 when showing email source. detect
> +# when UTF-8 is incorrectly interpreted as ISO-8859-1 and reencoded back.
> +# False positives are possible but very unlikely.
> +our $UTF8_MOJIBAKE = qr{
> + \xC3[\x82-\x9F] \xC2[\x80-\xBF]                    # c2-df 80-bf
> + | \xC3\xA0 \xC2[\xA0-\xBF] \xC2[\x80-\xBF]         # e0 a0-bf 80-bf
> + | \xC3[\xA1-\xAC\xAE\xAF] (?: \xC2[\x80-\xBF]){2}  # e1-ec/ee/ef 80-bf 80-bf
> + | \xC3\xAD \xC2[\x80-\x9F] \xC2[\x80-\xBF]         # ed 80-9f 80-bf
> + | \xC3\xB0 \xC2[\x90-\xBF] (?: \xC2[\x80-\xBF]){2} # f0 90-bf 80-bf 80-bf
> + | \xC3[\xB1-\xB3] (?: \xC2[\x80-\xBF]){3}          # f1-f3 80-bf 80-bf 80-bf
> + | \xC3\xB4 \xC2[\x80-\x8F] (?: \xC2[\x80-\xBF]){2} # f4 80-b8 80-bf 80-bf
> +}x;
> +
> +# There are still some false positives, but this catches most
> +# common cases.
> +our $typeTypedefs = qr{(?x:
> +        (?![KMGTPE]iB)                      # IEC binary prefix (do not match)
> +        [A-Z][A-Z\d_]*[a-z][A-Za-z\d_]*     # camelcase
> +        | [A-Z][A-Z\d_]*AIOCB               # all uppercase
> +        | [A-Z][A-Z\d_]*CPU                 # all uppercase
> +        | QEMUBH                            # all uppercase
> +)};
> +
> +our @typeList = (
> + qr{void},
> + qr{(?:unsigned\s+)?char},
> + qr{(?:unsigned\s+)?short},
> + qr{(?:unsigned\s+)?int},
> + qr{(?:unsigned\s+)?long},
> + qr{(?:unsigned\s+)?long\s+int},
> + qr{(?:unsigned\s+)?long\s+long},
> + qr{(?:unsigned\s+)?long\s+long\s+int},
> + qr{unsigned},
> + qr{float},
> + qr{double},
> + qr{bool},
> + qr{struct\s+$Ident},
> + qr{union\s+$Ident},
> + qr{enum\s+$Ident},
> + qr{${Ident}_t},
> + qr{${Ident}_handler},
> + qr{${Ident}_handler_fn},
> + qr{target_(?:u)?long},
> + qr{hwaddr},
> +);
> +
> +# This can be modified by sub possible.  Since it can be empty, be careful
> +# about regexes that always match, because they can cause infinite loops.
> +our @modifierList = (
> +);
> +
> +sub build_types {
> + my $all = "(?x:  \n" . join("|\n  ", @typeList) . "\n)";
> + if (@modifierList > 0) {
> + my $mods = "(?x:  \n" . join("|\n  ", @modifierList) . "\n)";
> + $Modifier = qr{(?:$Attribute|$Sparse|$mods)};
> + } else {
> + $Modifier = qr{(?:$Attribute|$Sparse)};
> + }
> + $NonptrType = qr{
> + (?:$Modifier\s+|const\s+)*
> + (?:
> + (?:typeof|__typeof__)\s*\(\s*\**\s*$Ident\s*\)|
> + (?:$typeTypedefs\b)|
> + (?:${all}\b)
> + )
> + (?:\s+$Modifier|\s+const)*
> +   }x;
> + $Type = qr{
> + $NonptrType
> + (?:[\s\*]+\s*const|[\s\*]+|(?:\s*\[\s*\])+)?
> + (?:\s+$Inline|\s+$Modifier)*
> +   }x;
> + $Declare = qr{(?:$Storage\s+)?$Type};
> +}
> +build_types();
> +
> +$chk_signoff = 0 if ($file);
> +
> +my @rawlines = ();
> +my @lines = ();
> +my $vname;
> +if ($chk_branch) {
> + my @patches;
> + my %git_commits = ();
> + my $HASH;
> + open($HASH, "-|", "git", "log", "--reverse", "--no-merges", "--format=%H %s", $ARGV[0]) ||
> + die "$P: git log --reverse --no-merges --format='%H %s' $ARGV[0] failed - $!\n";
> +
> + for my $line (<$HASH>) {
> + $line =~ /^([0-9a-fA-F]{40,40}) (.*)$/;
> + next if (!defined($1) || !defined($2));
> + my $sha1 = $1;
> + my $subject = $2;
> + push(@patches, $sha1);
> + $git_commits{$sha1} = $subject;
> + }
> +
> + close $HASH;
> +
> + die "$P: no revisions returned for revlist '$ARGV[0]'\n"
> +     unless @patches;
> +
> + my $i = 1;
> + my $num_patches = @patches;
> + for my $hash (@patches) {
> + my $FILE;
> + open($FILE, '-|', "git", "show", "--patch-with-stat", $hash) ||
> + die "$P: git show $hash - $!\n";
> + while (<$FILE>) {
> + chomp;
> + push(@rawlines, $_);
> + }
> + close($FILE);
> + $vname = substr($hash, 0, 12) . ' (' . $git_commits{$hash} . ')';
> + if ($num_patches > 1 && $quiet == 0) {
> + my $prefix = "$i/$num_patches";
> + $prefix = BLUE . BOLD . $prefix . RESET if $color;
> + print "$prefix Checking commit $vname\n";
> + $vname = "Patch $i/$num_patches";
> + } else {
> + $vname = "Commit " . $vname;
> + }
> + if (!process($hash)) {
> + $exit = 1;
> + print "\n" if ($num_patches > 1 && $quiet == 0);
> + }
> + @rawlines = ();
> + @lines = ();
> + $i++;
> + }
> +} else {
> + for my $filename (@ARGV) {
> + my $FILE;
> + if ($file) {
> + open($FILE, '-|', "diff -u /dev/null $filename") ||
> + die "$P: $filename: diff failed - $!\n";
> + } elsif ($filename eq '-') {
> + open($FILE, '<&STDIN');
> + } else {
> + open($FILE, '<', "$filename") ||
> + die "$P: $filename: open failed - $!\n";
> + }
> + if ($filename eq '-') {
> + $vname = 'Your patch';
> + } else {
> + $vname = $filename;
> + }
> + print "Checking $filename...\n" if @ARGV > 1 && $quiet == 0;
> + while (<$FILE>) {
> + chomp;
> + push(@rawlines, $_);
> + }
> + close($FILE);
> + if (!process($filename)) {
> + $exit = 1;
> + }
> + @rawlines = ();
> + @lines = ();
> + }
> +}
> +
> +exit($exit);
> +
> +sub top_of_kernel_tree {
> + my ($root) = @_;
> +
> + my @tree_check = (
> +     "Makefile.inc1", "README.md", "sys",
> +     "usr.sbin"
> + );
> +
> + foreach my $check (@tree_check) {
> + if (! -e $root . '/' . $check) {
> + return 0;
> + }
> + }
> + return 1;
> +}
> +
> +sub expand_tabs {
> + my ($str) = @_;
> +
> + my $res = '';
> + my $n = 0;
> + for my $c (split(//, $str)) {
> + if ($c eq "\t") {
> + $res .= ' ';
> + $n++;
> + for (; ($n % 8) != 0; $n++) {
> + $res .= ' ';
> + }
> + next;
> + }
> + $res .= $c;
> + $n++;
> + }
> +
> + return $res;
> +}
> +sub copy_spacing {
> + (my $res = shift) =~ tr/\t/ /c;
> + return $res;
> +}
> +
> +sub line_stats {
> + my ($line) = @_;
> +
> + # Drop the diff line leader and expand tabs
> + $line =~ s/^.//;
> + $line = expand_tabs($line);
> +
> + # Pick the indent from the front of the line.
> + my ($white) = ($line =~ /^(\s*)/);
> +
> + return (length($line), length($white));
> +}
> +
> +my $sanitise_quote = '';
> +
> +sub sanitise_line_reset {
> + my ($in_comment) = @_;
> +
> + if ($in_comment) {
> + $sanitise_quote = '*/';
> + } else {
> + $sanitise_quote = '';
> + }
> +}
> +sub sanitise_line {
> + my ($line) = @_;
> +
> + my $res = '';
> + my $l = '';
> +
> + my $qlen = 0;
> + my $off = 0;
> + my $c;
> +
> + # Always copy over the diff marker.
> + $res = substr($line, 0, 1);
> +
> + for ($off = 1; $off < length($line); $off++) {
> + $c = substr($line, $off, 1);
> +
> + # Comments we are wacking completely including the begin
> + # and end, all to $;.
> + if ($sanitise_quote eq '' && substr($line, $off, 2) eq '/*') {
> + $sanitise_quote = '*/';
> +
> + substr($res, $off, 2, "$;$;");
> + $off++;
> + next;
> + }
> + if ($sanitise_quote eq '*/' && substr($line, $off, 2) eq '*/') {
> + $sanitise_quote = '';
> + substr($res, $off, 2, "$;$;");
> + $off++;
> + next;
> + }
> + if ($sanitise_quote eq '' && substr($line, $off, 2) eq '//') {
> + $sanitise_quote = '//';
> +
> + substr($res, $off, 2, $sanitise_quote);
> + $off++;
> + next;
> + }
> +
> + # A \ in a string means ignore the next character.
> + if (($sanitise_quote eq "'" || $sanitise_quote eq '"') &&
> +     $c eq "\\") {
> + substr($res, $off, 2, 'XX');
> + $off++;
> + next;
> + }
> + # Regular quotes.
> + if ($c eq "'" || $c eq '"') {
> + if ($sanitise_quote eq '') {
> + $sanitise_quote = $c;
> +
> + substr($res, $off, 1, $c);
> + next;
> + } elsif ($sanitise_quote eq $c) {
> + $sanitise_quote = '';
> + }
> + }
> +
> + #print "c<$c> SQ<$sanitise_quote>\n";
> + if ($off != 0 && $sanitise_quote eq '*/' && $c ne "\t") {
> + substr($res, $off, 1, $;);
> + } elsif ($off != 0 && $sanitise_quote eq '//' && $c ne "\t") {
> + substr($res, $off, 1, $;);
> + } elsif ($off != 0 && $sanitise_quote && $c ne "\t") {
> + substr($res, $off, 1, 'X');
> + } else {
> + substr($res, $off, 1, $c);
> + }
> + }
> +
> + if ($sanitise_quote eq '//') {
> + $sanitise_quote = '';
> + }
> +
> + # The pathname on a #include may be surrounded by '<' and '>'.
> + if ($res =~ /^.\s*\#\s*include\s+\<(.*)\>/) {
> + my $clean = 'X' x length($1);
> + $res =~ s@\<.*\>@<$clean>@;
> +
> + # The whole of a #error is a string.
> + } elsif ($res =~ /^.\s*\#\s*(?:error|warning)\s+(.*)\b/) {
> + my $clean = 'X' x length($1);
> + $res =~ s@(\#\s*(?:error|warning)\s+).*@$1$clean@;
> + }
> +
> + return $res;
> +}
> +
> +sub ctx_statement_block {
> + my ($linenr, $remain, $off) = @_;
> + my $line = $linenr - 1;
> + my $blk = '';
> + my $soff = $off;
> + my $coff = $off - 1;
> + my $coff_set = 0;
> +
> + my $loff = 0;
> +
> + my $type = '';
> + my $level = 0;
> + my @stack = ();
> + my $p;
> + my $c;
> + my $len = 0;
> +
> + my $remainder;
> + while (1) {
> + @stack = (['', 0]) if ($#stack == -1);
> +
> + #warn "CSB: blk<$blk> remain<$remain>\n";
> + # If we are about to drop off the end, pull in more
> + # context.
> + if ($off >= $len) {
> + for (; $remain > 0; $line++) {
> + last if (!defined $lines[$line]);
> + next if ($lines[$line] =~ /^-/);
> + $remain--;
> + $loff = $len;
> + $blk .= $lines[$line] . "\n";
> + $len = length($blk);
> + $line++;
> + last;
> + }
> + # Bail if there is no further context.
> + #warn "CSB: blk<$blk> off<$off> len<$len>\n";
> + if ($off >= $len) {
> + last;
> + }
> + }
> + $p = $c;
> + $c = substr($blk, $off, 1);
> + $remainder = substr($blk, $off);
> +
> + #warn "CSB: c<$c> type<$type> level<$level> remainder<$remainder> coff_set<$coff_set>\n";
> +
> + # Handle nested #if/#else.
> + if ($remainder =~ /^#\s*(?:ifndef|ifdef|if)\s/) {
> + push(@stack, [ $type, $level ]);
> + } elsif ($remainder =~ /^#\s*(?:else|elif)\b/) {
> + ($type, $level) = @{$stack[$#stack - 1]};
> + } elsif ($remainder =~ /^#\s*endif\b/) {
> + ($type, $level) = @{pop(@stack)};
> + }
> +
> + # Statement ends at the ';' or a close '}' at the
> + # outermost level.
> + if ($level == 0 && $c eq ';') {
> + last;
> + }
> +
> + # An else is really a conditional as long as its not else if
> + if ($level == 0 && $coff_set == 0 &&
> + (!defined($p) || $p =~ /(?:\s|\}|\+)/) &&
> + $remainder =~ /^(else)(?:\s|{)/ &&
> + $remainder !~ /^else\s+if\b/) {
> + $coff = $off + length($1) - 1;
> + $coff_set = 1;
> + #warn "CSB: mark coff<$coff> soff<$soff> 1<$1>\n";
> + #warn "[" . substr($blk, $soff, $coff - $soff + 1) . "]\n";
> + }
> +
> + if (($type eq '' || $type eq '(') && $c eq '(') {
> + $level++;
> + $type = '(';
> + }
> + if ($type eq '(' && $c eq ')') {
> + $level--;
> + $type = ($level != 0)? '(' : '';
> +
> + if ($level == 0 && $coff < $soff) {
> + $coff = $off;
> + $coff_set = 1;
> + #warn "CSB: mark coff<$coff>\n";
> + }
> + }
> + if (($type eq '' || $type eq '{') && $c eq '{') {
> + $level++;
> + $type = '{';
> + }
> + if ($type eq '{' && $c eq '}') {
> + $level--;
> + $type = ($level != 0)? '{' : '';
> +
> + if ($level == 0) {
> + if (substr($blk, $off + 1, 1) eq ';') {
> + $off++;
> + }
> + last;
> + }
> + }
> + $off++;
> + }
> + # We are truly at the end, so shuffle to the next line.
> + if ($off == $len) {
> + $loff = $len + 1;
> + $line++;
> + $remain--;
> + }
> +
> + my $statement = substr($blk, $soff, $off - $soff + 1);
> + my $condition = substr($blk, $soff, $coff - $soff + 1);
> +
> + #warn "STATEMENT<$statement>\n";
> + #warn "CONDITION<$condition>\n";
> +
> + #print "coff<$coff> soff<$off> loff<$loff>\n";
> +
> + return ($statement, $condition,
> + $line, $remain + 1, $off - $loff + 1, $level);
> +}
> +
> +sub statement_lines {
> + my ($stmt) = @_;
> +
> + # Strip the diff line prefixes and rip blank lines at start and end.
> + $stmt =~ s/(^|\n)./$1/g;
> + $stmt =~ s/^\s*//;
> + $stmt =~ s/\s*$//;
> +
> + my @stmt_lines = ($stmt =~ /\n/g);
> +
> + return $#stmt_lines + 2;
> +}
> +
> +sub statement_rawlines {
> + my ($stmt) = @_;
> +
> + my @stmt_lines = ($stmt =~ /\n/g);
> +
> + return $#stmt_lines + 2;
> +}
> +
> +sub statement_block_size {
> + my ($stmt) = @_;
> +
> + $stmt =~ s/(^|\n)./$1/g;
> + $stmt =~ s/^\s*\{//;
> + $stmt =~ s/}\s*$//;
> + $stmt =~ s/^\s*//;
> + $stmt =~ s/\s*$//;
> +
> + my @stmt_lines = ($stmt =~ /\n/g);
> + my @stmt_statements = ($stmt =~ /;/g);
> +
> + my $stmt_lines = $#stmt_lines + 2;
> + my $stmt_statements = $#stmt_statements + 1;
> +
> + if ($stmt_lines > $stmt_statements) {
> + return $stmt_lines;
> + } else {
> + return $stmt_statements;
> + }
> +}
> +
> +sub ctx_statement_full {
> + my ($linenr, $remain, $off) = @_;
> + my ($statement, $condition, $level);
> +
> + my (@chunks);
> +
> + # Grab the first conditional/block pair.
> + ($statement, $condition, $linenr, $remain, $off, $level) =
> + ctx_statement_block($linenr, $remain, $off);
> + #print "F: c<$condition> s<$statement> remain<$remain>\n";
> + push(@chunks, [ $condition, $statement ]);
> + if (!($remain > 0 && $condition =~ /^\s*(?:\n[+-])?\s*(?:if|else|do)\b/s)) {
> + return ($level, $linenr, @chunks);
> + }
> +
> + # Pull in the following conditional/block pairs and see if they
> + # could continue the statement.
> + for (;;) {
> + ($statement, $condition, $linenr, $remain, $off, $level) =
> + ctx_statement_block($linenr, $remain, $off);
> + #print "C: c<$condition> s<$statement> remain<$remain>\n";
> + last if (!($remain > 0 && $condition =~ /^(?:\s*\n[+-])*\s*(?:else|do)\b/s));
> + #print "C: push\n";
> + push(@chunks, [ $condition, $statement ]);
> + }
> +
> + return ($level, $linenr, @chunks);
> +}
> +
> +sub ctx_block_get {
> + my ($linenr, $remain, $outer, $open, $close, $off) = @_;
> + my $line;
> + my $start = $linenr - 1;
> + my $blk = '';
> + my @o;
> + my @c;
> + my @res = ();
> +
> + my $level = 0;
> + my @stack = ($level);
> + for ($line = $start; $remain > 0; $line++) {
> + next if ($rawlines[$line] =~ /^-/);
> + $remain--;
> +
> + $blk .= $rawlines[$line];
> +
> + # Handle nested #if/#else.
> + if ($lines[$line] =~ /^.\s*#\s*(?:ifndef|ifdef|if)\s/) {
> + push(@stack, $level);
> + } elsif ($lines[$line] =~ /^.\s*#\s*(?:else|elif)\b/) {
> + $level = $stack[$#stack - 1];
> + } elsif ($lines[$line] =~ /^.\s*#\s*endif\b/) {
> + $level = pop(@stack);
> + }
> +
> + foreach my $c (split(//, $lines[$line])) {
> + ##print "C<$c>L<$level><$open$close>O<$off>\n";
> + if ($off > 0) {
> + $off--;
> + next;
> + }
> +
> + if ($c eq $close && $level > 0) {
> + $level--;
> + last if ($level == 0);
> + } elsif ($c eq $open) {
> + $level++;
> + }
> + }
> +
> + if (!$outer || $level <= 1) {
> + push(@res, $rawlines[$line]);
> + }
> +
> + last if ($level == 0);
> + }
> +
> + return ($level, @res);
> +}
> +sub ctx_block_outer {
> + my ($linenr, $remain) = @_;
> +
> + my ($level, @r) = ctx_block_get($linenr, $remain, 1, '{', '}', 0);
> + return @r;
> +}
> +sub ctx_block {
> + my ($linenr, $remain) = @_;
> +
> + my ($level, @r) = ctx_block_get($linenr, $remain, 0, '{', '}', 0);
> + return @r;
> +}
> +sub ctx_statement {
> + my ($linenr, $remain, $off) = @_;
> +
> + my ($level, @r) = ctx_block_get($linenr, $remain, 0, '(', ')', $off);
> + return @r;
> +}
> +sub ctx_block_level {
> + my ($linenr, $remain) = @_;
> +
> + return ctx_block_get($linenr, $remain, 0, '{', '}', 0);
> +}
> +sub ctx_statement_level {
> + my ($linenr, $remain, $off) = @_;
> +
> + return ctx_block_get($linenr, $remain, 0, '(', ')', $off);
> +}
> +
> +sub ctx_locate_comment {
> + my ($first_line, $end_line) = @_;
> +
> + # Catch a comment on the end of the line itself.
> + my ($current_comment) = ($rawlines[$end_line - 1] =~ m@.*(/\*.*\*/)\s*(?:\\\s*)?$@);
> + return $current_comment if (defined $current_comment);
> +
> + # Look through the context and try and figure out if there is a
> + # comment.
> + my $in_comment = 0;
> + $current_comment = '';
> + for (my $linenr = $first_line; $linenr < $end_line; $linenr++) {
> + my $line = $rawlines[$linenr - 1];
> + #warn "           $line\n";
> + if ($linenr == $first_line and $line =~ m@^.\s*\*@) {
> + $in_comment = 1;
> + }
> + if ($line =~ m@/\*@) {
> + $in_comment = 1;
> + }
> + if (!$in_comment && $current_comment ne '') {
> + $current_comment = '';
> + }
> + $current_comment .= $line . "\n" if ($in_comment);
> + if ($line =~ m@\*/@) {
> + $in_comment = 0;
> + }
> + }
> +
> + chomp($current_comment);
> + return($current_comment);
> +}
> +sub ctx_has_comment {
> + my ($first_line, $end_line) = @_;
> + my $cmt = ctx_locate_comment($first_line, $end_line);
> +
> + ##print "LINE: $rawlines[$end_line - 1 ]\n";
> + ##print "CMMT: $cmt\n";
> +
> + return ($cmt ne '');
> +}
> +
> +sub raw_line {
> + my ($linenr, $cnt) = @_;
> +
> + my $offset = $linenr - 1;
> + $cnt++;
> +
> + my $line;
> + while ($cnt) {
> + $line = $rawlines[$offset++];
> + next if (defined($line) && $line =~ /^-/);
> + $cnt--;
> + }
> +
> + return $line;
> +}
> +
> +sub cat_vet {
> + my ($vet) = @_;
> + my ($res, $coded);
> +
> + $res = '';
> + while ($vet =~ /([^[:cntrl:]]*)([[:cntrl:]]|$)/g) {
> + $res .= $1;
> + if ($2 ne '') {
> + $coded = sprintf("^%c", unpack('C', $2) + 64);
> + $res .= $coded;
> + }
> + }
> + $res =~ s/$/\$/;
> +
> + return $res;
> +}
> +
> +my $av_preprocessor = 0;
> +my $av_pending;
> +my @av_paren_type;
> +my $av_pend_colon;
> +
> +sub annotate_reset {
> + $av_preprocessor = 0;
> + $av_pending = '_';
> + @av_paren_type = ('E');
> + $av_pend_colon = 'O';
> +}
> +
> +sub annotate_values {
> + my ($stream, $type) = @_;
> +
> + my $res;
> + my $var = '_' x length($stream);
> + my $cur = $stream;
> +
> + print "$stream\n" if ($dbg_values > 1);
> +
> + while (length($cur)) {
> + @av_paren_type = ('E') if ($#av_paren_type < 0);
> + print " <" . join('', @av_paren_type) .
> + "> <$type> <$av_pending>" if ($dbg_values > 1);
> + if ($cur =~ /^(\s+)/o) {
> + print "WS($1)\n" if ($dbg_values > 1);
> + if ($1 =~ /\n/ && $av_preprocessor) {
> + $type = pop(@av_paren_type);
> + $av_preprocessor = 0;
> + }
> +
> + } elsif ($cur =~ /^(\(\s*$Type\s*)\)/ && $av_pending eq '_') {
> + print "CAST($1)\n" if ($dbg_values > 1);
> + push(@av_paren_type, $type);
> + $type = 'C';
> +
> + } elsif ($cur =~ /^($Type)\s*(?:$Ident|,|\)|\(|\s*$)/) {
> + print "DECLARE($1)\n" if ($dbg_values > 1);
> + $type = 'T';
> +
> + } elsif ($cur =~ /^($Modifier)\s*/) {
> + print "MODIFIER($1)\n" if ($dbg_values > 1);
> + $type = 'T';
> +
> + } elsif ($cur =~ /^(\#\s*define\s*$Ident)(\(?)/o) {
> + print "DEFINE($1,$2)\n" if ($dbg_values > 1);
> *** 1791 LINES SKIPPED ***
>