Re: git: 3a3c9242739e - main - checkstyle9.pl: Perl script to check if a change is approximately style(9)
Date: Sat, 25 Mar 2023 17:24:54 UTC
Yea, I did most of the work on this 18 months ago or so, but the shit in core and subsequent nastiness derailed it til now. It's gotten to 'decent or better' at things, but far from perfect. I'm sure that now the bug reports will come. Warner On Sat, Mar 25, 2023 at 6:22 PM Drew Gallatin <gallatin@freebsd.org> wrote: > Thanks so much. I've wanted something like this *forever* > > On Sat, Mar 25, 2023, at 1:08 PM, Warner Losh wrote: > > The branch main has been updated by imp: > > URL: > https://cgit.FreeBSD.org/src/commit/?id=3a3c9242739efb0c76587ffbaa54c5d10b2cbcb4 > > commit 3a3c9242739efb0c76587ffbaa54c5d10b2cbcb4 > Author: Warner Losh <imp@FreeBSD.org> > AuthorDate: 2023-03-14 21:28:05 +0000 > Commit: Warner Losh <imp@FreeBSD.org> > CommitDate: 2023-03-25 17:06:13 +0000 > > checkstyle9.pl: Perl script to check if a change is approximately > style(9) > > This code is adapted from the QEMU checkpatch.pl script. It can check > either a patch, a file or a git branch. It tries to warn about things > that I believe might be style(9) violations. It's experimental, since I > heavily hacked on the qemu version to get it to not complain (much) > about iconic code in the tree. At the moment, it's use should be > considered expermental. It will likely miss violations, and complain > about code that's perfectly fine. It's offered as an experiment > and to make it easier for contributors to submit patches. > --- > tools/build/checkstyle9.pl | 2748 > ++++++++++++++++++++++++++++++++++++++++++++ > 1 file changed, 2748 insertions(+) > > diff --git a/tools/build/checkstyle9.pl b/tools/build/checkstyle9.pl > new file mode 100755 > index 000000000000..5aec3819bf7c > --- /dev/null > +++ b/tools/build/checkstyle9.pl > @@ -0,0 +1,2748 @@ > +#!/usr/bin/env perl > +# (c) 2001, Dave Jones. (the file handling bit) > +# (c) 2005, Joel Schopp <jschopp@austin.ibm.com> (the ugly bit) > +# (c) 2007,2008, Andy Whitcroft <apw@uk.ibm.com> (new conditions, test > suite) > +# (c) 2008-2010 Andy Whitcroft <apw@canonical.com> > +# Licensed under the terms of the GNU GPL License version 2 > + > +use strict; > +use warnings; > +use Term::ANSIColor qw(:constants); > + > +my $P = $0; > +$P =~ s@.*/@@g; > + > +our $SrcFile = qr{\.(?:h|c|cpp|s|S|pl|py|sh)$}; > + > +my $V = '0.31'; > + > +use Getopt::Long qw(:config no_auto_abbrev); > + > +my $quiet = 0; > +my $tree = 1; > +my $chk_signoff = 1; > +my $chk_patch = undef; > +my $chk_branch = undef; > +my $tst_only; > +my $emacs = 0; > +my $terse = 0; > +my $file = undef; > +my $color = "auto"; > +my $no_warnings = 0; > +my $summary = 1; > +my $mailback = 0; > +my $summary_file = 0; > +my $root; > +my %debug; > +my $help = 0; > + > +sub help { > + my ($exitcode) = @_; > + > + print << "EOM"; > +Usage: > + > + $P [OPTION]... [FILE]... > + $P [OPTION]... [GIT-REV-LIST] > + > +Version: $V > + > +Options: > + -q, --quiet quiet > + --patch treat FILE as patchfile > + --branch treat args as GIT revision list > + --emacs emacs compile window format > + --terse one line per report > + -f, --file treat FILE as regular source file > + --strict fail if only warnings are found > + --no-summary suppress the per-file summary > + --mailback only produce a report in case of > warnings/errors > + --summary-file include the filename in summary > + --debug KEY=[0|1] turn on/off debugging of KEY, where KEY is > one of > + 'values', 'possible', 'type', and 'attr' > (default > + is all off) > + --test-only=WORD report only warnings/errors containing WORD > + literally > + --color[=WHEN] Use colors 'always', 'never', or only when > output > + is a terminal ('auto'). Default is 'auto'. > + -h, --help, --version display this help and exit > + > +When FILE is - read standard input. > +EOM > + > + exit($exitcode); > +} > + > +# Use at your own risk > +print "\n", MAGENTA, "WARNING:", RESET, " This code is highly > experimental ... likely isn't a great style(9) match yet\n\n"; > + > +# Perl's Getopt::Long allows options to take optional arguments after a > space. > +# Prevent --color by itself from consuming other arguments > +foreach (@ARGV) { > + if ($_ eq "--color" || $_ eq "-color") { > + $_ = "--color=$color"; > + } > +} > + > +GetOptions( > + 'q|quiet+' => \$quiet, > + 'tree!' => \$tree, > + 'signoff!' => \$chk_signoff, > + 'patch!' => \$chk_patch, > + 'branch!' => \$chk_branch, > + 'emacs!' => \$emacs, > + 'terse!' => \$terse, > + 'f|file!' => \$file, > + 'strict!' => \$no_warnings, > + 'root=s' => \$root, > + 'summary!' => \$summary, > + 'mailback!' => \$mailback, > + 'summary-file!' => \$summary_file, > + > + 'debug=s' => \%debug, > + 'test-only=s' => \$tst_only, > + 'color=s' => \$color, > + 'no-color' => sub { $color = 'never'; }, > + 'h|help' => \$help, > + 'version' => \$help > +) or help(1); > + > +help(0) if ($help); > + > +my $exit = 0; > + > +if ($#ARGV < 0) { > + print "$P: no input files\n"; > + exit(1); > +} > + > +if (!defined $chk_branch && !defined $chk_patch && !defined $file) { > + $chk_branch = $ARGV[0] =~ /.\.\./ ? 1 : 0; > + $file = $ARGV[0] =~ /$SrcFile/ ? 1 : 0; > + $chk_patch = $chk_branch || $file ? 0 : 1; > +} elsif (!defined $chk_branch && !defined $chk_patch) { > + if ($file) { > + $chk_branch = $chk_patch = 0; > + } else { > + $chk_branch = $ARGV[0] =~ /.\.\./ ? 1 : 0; > + $chk_patch = $chk_branch ? 0 : 1; > + } > +} elsif (!defined $chk_branch && !defined $file) { > + if ($chk_patch) { > + $chk_branch = $file = 0; > + } else { > + $chk_branch = $ARGV[0] =~ /.\.\./ ? 1 : 0; > + $file = $chk_branch ? 0 : 1; > + } > +} elsif (!defined $chk_patch && !defined $file) { > + if ($chk_branch) { > + $chk_patch = $file = 0; > + } else { > + $file = $ARGV[0] =~ /$SrcFile/ ? 1 : 0; > + $chk_patch = $file ? 0 : 1; > + } > +} elsif (!defined $chk_branch) { > + $chk_branch = $chk_patch || $file ? 0 : 1; > +} elsif (!defined $chk_patch) { > + $chk_patch = $chk_branch || $file ? 0 : 1; > +} elsif (!defined $file) { > + $file = $chk_patch || $chk_branch ? 0 : 1; > +} > + > +if (($chk_patch && $chk_branch) || > + ($chk_patch && $file) || > + ($chk_branch && $file)) { > + die "Only one of --file, --branch, --patch is permitted\n"; > +} > +if (!$chk_patch && !$chk_branch && !$file) { > + die "One of --file, --branch, --patch is required\n"; > +} > + > +if ($color =~ /^always$/i) { > + $color = 1; > +} elsif ($color =~ /^never$/i) { > + $color = 0; > +} elsif ($color =~ /^auto$/i) { > + $color = (-t STDOUT); > +} else { > + die "Invalid color mode: $color\n"; > +} > + > +my $dbg_values = 0; > +my $dbg_possible = 0; > +my $dbg_type = 0; > +my $dbg_attr = 0; > +my $dbg_adv_dcs = 0; > +my $dbg_adv_checking = 0; > +my $dbg_adv_apw = 0; > +for my $key (keys %debug) { > + ## no critic > + eval "\${dbg_$key} = '$debug{$key}';"; > + die "$@" if ($@); > +} > + > +my $rpt_cleaners = 0; > + > +if ($terse) { > + $emacs = 1; > + $quiet++; > +} > + > +my $emitted_corrupt = 0; > + > +our $Ident = qr{ > + [A-Za-z_][A-Za-z\d_]* > + (?:\s*\#\#\s*[A-Za-z_][A-Za-z\d_]*)* > + }x; > +our $Storage = qr{extern|static|asmlinkage}; > +our $Sparse = qr{ > + __force > + }x; > + > +# Notes to $Attribute: > +our $Attribute = qr{ > + const| > + volatile| > + QEMU_NORETURN| > + QEMU_WARN_UNUSED_RESULT| > + QEMU_SENTINEL| > + QEMU_PACKED| > + GCC_FMT_ATTR > + }x; > +our $Modifier; > +our $Inline = qr{inline}; > +our $Member = qr{->$Ident|\.$Ident|\[[^]]*\]}; > +our $Lval = qr{$Ident(?:$Member)*}; > + > +our $Constant = qr{(?:[0-9]+|0x[0-9a-fA-F]+)[UL]*}; > +our $Assignment = qr{(?:\*\=|/=|%=|\+=|-=|<<=|>>=|&=|\^=|\|=|=)}; > +our $Compare = qr{<=|>=|==|!=|<|>}; > +our $Operators = qr{ > + <=|>=|==|!=| > + =>|->|<<|>>|<|>|!|~| > + &&|\|\||,|\^|\+\+|--|&|\||\+|-|\*|\/|% > + }x; > + > +our $NonptrType; > +our $Type; > +our $Declare; > + > +our $NON_ASCII_UTF8 = qr{ > + [\xC2-\xDF][\x80-\xBF] # non-overlong 2-byte > + | \xE0[\xA0-\xBF][\x80-\xBF] # excluding overlongs > + | [\xE1-\xEC\xEE\xEF][\x80-\xBF]{2} # straight 3-byte > + | \xED[\x80-\x9F][\x80-\xBF] # excluding surrogates > + | \xF0[\x90-\xBF][\x80-\xBF]{2} # planes 1-3 > + | [\xF1-\xF3][\x80-\xBF]{3} # planes 4-15 > + | \xF4[\x80-\x8F][\x80-\xBF]{2} # plane 16 > +}x; > + > +our $UTF8 = qr{ > + [\x09\x0A\x0D\x20-\x7E] # ASCII > + | $NON_ASCII_UTF8 > +}x; > + > +# some readers default to ISO-8859-1 when showing email source. detect > +# when UTF-8 is incorrectly interpreted as ISO-8859-1 and reencoded back. > +# False positives are possible but very unlikely. > +our $UTF8_MOJIBAKE = qr{ > + \xC3[\x82-\x9F] \xC2[\x80-\xBF] # c2-df 80-bf > + | \xC3\xA0 \xC2[\xA0-\xBF] \xC2[\x80-\xBF] # e0 a0-bf 80-bf > + | \xC3[\xA1-\xAC\xAE\xAF] (?: \xC2[\x80-\xBF]){2} # e1-ec/ee/ef 80-bf > 80-bf > + | \xC3\xAD \xC2[\x80-\x9F] \xC2[\x80-\xBF] # ed 80-9f 80-bf > + | \xC3\xB0 \xC2[\x90-\xBF] (?: \xC2[\x80-\xBF]){2} # f0 90-bf 80-bf 80-bf > + | \xC3[\xB1-\xB3] (?: \xC2[\x80-\xBF]){3} # f1-f3 80-bf 80-bf > 80-bf > + | \xC3\xB4 \xC2[\x80-\x8F] (?: \xC2[\x80-\xBF]){2} # f4 80-b8 80-bf 80-bf > +}x; > + > +# There are still some false positives, but this catches most > +# common cases. > +our $typeTypedefs = qr{(?x: > + (?![KMGTPE]iB) # IEC binary prefix (do not > match) > + [A-Z][A-Z\d_]*[a-z][A-Za-z\d_]* # camelcase > + | [A-Z][A-Z\d_]*AIOCB # all uppercase > + | [A-Z][A-Z\d_]*CPU # all uppercase > + | QEMUBH # all uppercase > +)}; > + > +our @typeList = ( > + qr{void}, > + qr{(?:unsigned\s+)?char}, > + qr{(?:unsigned\s+)?short}, > + qr{(?:unsigned\s+)?int}, > + qr{(?:unsigned\s+)?long}, > + qr{(?:unsigned\s+)?long\s+int}, > + qr{(?:unsigned\s+)?long\s+long}, > + qr{(?:unsigned\s+)?long\s+long\s+int}, > + qr{unsigned}, > + qr{float}, > + qr{double}, > + qr{bool}, > + qr{struct\s+$Ident}, > + qr{union\s+$Ident}, > + qr{enum\s+$Ident}, > + qr{${Ident}_t}, > + qr{${Ident}_handler}, > + qr{${Ident}_handler_fn}, > + qr{target_(?:u)?long}, > + qr{hwaddr}, > +); > + > +# This can be modified by sub possible. Since it can be empty, be careful > +# about regexes that always match, because they can cause infinite loops. > +our @modifierList = ( > +); > + > +sub build_types { > + my $all = "(?x: \n" . join("|\n ", @typeList) . "\n)"; > + if (@modifierList > 0) { > + my $mods = "(?x: \n" . join("|\n ", @modifierList) . "\n)"; > + $Modifier = qr{(?:$Attribute|$Sparse|$mods)}; > + } else { > + $Modifier = qr{(?:$Attribute|$Sparse)}; > + } > + $NonptrType = qr{ > + (?:$Modifier\s+|const\s+)* > + (?: > + (?:typeof|__typeof__)\s*\(\s*\**\s*$Ident\s*\)| > + (?:$typeTypedefs\b)| > + (?:${all}\b) > + ) > + (?:\s+$Modifier|\s+const)* > + }x; > + $Type = qr{ > + $NonptrType > + (?:[\s\*]+\s*const|[\s\*]+|(?:\s*\[\s*\])+)? > + (?:\s+$Inline|\s+$Modifier)* > + }x; > + $Declare = qr{(?:$Storage\s+)?$Type}; > +} > +build_types(); > + > +$chk_signoff = 0 if ($file); > + > +my @rawlines = (); > +my @lines = (); > +my $vname; > +if ($chk_branch) { > + my @patches; > + my %git_commits = (); > + my $HASH; > + open($HASH, "-|", "git", "log", "--reverse", "--no-merges", "--format=%H > %s", $ARGV[0]) || > + die "$P: git log --reverse --no-merges --format='%H %s' $ARGV[0] failed > - $!\n"; > + > + for my $line (<$HASH>) { > + $line =~ /^([0-9a-fA-F]{40,40}) (.*)$/; > + next if (!defined($1) || !defined($2)); > + my $sha1 = $1; > + my $subject = $2; > + push(@patches, $sha1); > + $git_commits{$sha1} = $subject; > + } > + > + close $HASH; > + > + die "$P: no revisions returned for revlist '$ARGV[0]'\n" > + unless @patches; > + > + my $i = 1; > + my $num_patches = @patches; > + for my $hash (@patches) { > + my $FILE; > + open($FILE, '-|', "git", "show", "--patch-with-stat", $hash) || > + die "$P: git show $hash - $!\n"; > + while (<$FILE>) { > + chomp; > + push(@rawlines, $_); > + } > + close($FILE); > + $vname = substr($hash, 0, 12) . ' (' . $git_commits{$hash} . ')'; > + if ($num_patches > 1 && $quiet == 0) { > + my $prefix = "$i/$num_patches"; > + $prefix = BLUE . BOLD . $prefix . RESET if $color; > + print "$prefix Checking commit $vname\n"; > + $vname = "Patch $i/$num_patches"; > + } else { > + $vname = "Commit " . $vname; > + } > + if (!process($hash)) { > + $exit = 1; > + print "\n" if ($num_patches > 1 && $quiet == 0); > + } > + @rawlines = (); > + @lines = (); > + $i++; > + } > +} else { > + for my $filename (@ARGV) { > + my $FILE; > + if ($file) { > + open($FILE, '-|', "diff -u /dev/null $filename") || > + die "$P: $filename: diff failed - $!\n"; > + } elsif ($filename eq '-') { > + open($FILE, '<&STDIN'); > + } else { > + open($FILE, '<', "$filename") || > + die "$P: $filename: open failed - $!\n"; > + } > + if ($filename eq '-') { > + $vname = 'Your patch'; > + } else { > + $vname = $filename; > + } > + print "Checking $filename...\n" if @ARGV > 1 && $quiet == 0; > + while (<$FILE>) { > + chomp; > + push(@rawlines, $_); > + } > + close($FILE); > + if (!process($filename)) { > + $exit = 1; > + } > + @rawlines = (); > + @lines = (); > + } > +} > + > +exit($exit); > + > +sub top_of_kernel_tree { > + my ($root) = @_; > + > + my @tree_check = ( > + "Makefile.inc1", "README.md", "sys", > + "usr.sbin" > + ); > + > + foreach my $check (@tree_check) { > + if (! -e $root . '/' . $check) { > + return 0; > + } > + } > + return 1; > +} > + > +sub expand_tabs { > + my ($str) = @_; > + > + my $res = ''; > + my $n = 0; > + for my $c (split(//, $str)) { > + if ($c eq "\t") { > + $res .= ' '; > + $n++; > + for (; ($n % 8) != 0; $n++) { > + $res .= ' '; > + } > + next; > + } > + $res .= $c; > + $n++; > + } > + > + return $res; > +} > +sub copy_spacing { > + (my $res = shift) =~ tr/\t/ /c; > + return $res; > +} > + > +sub line_stats { > + my ($line) = @_; > + > + # Drop the diff line leader and expand tabs > + $line =~ s/^.//; > + $line = expand_tabs($line); > + > + # Pick the indent from the front of the line. > + my ($white) = ($line =~ /^(\s*)/); > + > + return (length($line), length($white)); > +} > + > +my $sanitise_quote = ''; > + > +sub sanitise_line_reset { > + my ($in_comment) = @_; > + > + if ($in_comment) { > + $sanitise_quote = '*/'; > + } else { > + $sanitise_quote = ''; > + } > +} > +sub sanitise_line { > + my ($line) = @_; > + > + my $res = ''; > + my $l = ''; > + > + my $qlen = 0; > + my $off = 0; > + my $c; > + > + # Always copy over the diff marker. > + $res = substr($line, 0, 1); > + > + for ($off = 1; $off < length($line); $off++) { > + $c = substr($line, $off, 1); > + > + # Comments we are wacking completely including the begin > + # and end, all to $;. > + if ($sanitise_quote eq '' && substr($line, $off, 2) eq '/*') { > + $sanitise_quote = '*/'; > + > + substr($res, $off, 2, "$;$;"); > + $off++; > + next; > + } > + if ($sanitise_quote eq '*/' && substr($line, $off, 2) eq '*/') { > + $sanitise_quote = ''; > + substr($res, $off, 2, "$;$;"); > + $off++; > + next; > + } > + if ($sanitise_quote eq '' && substr($line, $off, 2) eq '//') { > + $sanitise_quote = '//'; > + > + substr($res, $off, 2, $sanitise_quote); > + $off++; > + next; > + } > + > + # A \ in a string means ignore the next character. > + if (($sanitise_quote eq "'" || $sanitise_quote eq '"') && > + $c eq "\\") { > + substr($res, $off, 2, 'XX'); > + $off++; > + next; > + } > + # Regular quotes. > + if ($c eq "'" || $c eq '"') { > + if ($sanitise_quote eq '') { > + $sanitise_quote = $c; > + > + substr($res, $off, 1, $c); > + next; > + } elsif ($sanitise_quote eq $c) { > + $sanitise_quote = ''; > + } > + } > + > + #print "c<$c> SQ<$sanitise_quote>\n"; > + if ($off != 0 && $sanitise_quote eq '*/' && $c ne "\t") { > + substr($res, $off, 1, $;); > + } elsif ($off != 0 && $sanitise_quote eq '//' && $c ne "\t") { > + substr($res, $off, 1, $;); > + } elsif ($off != 0 && $sanitise_quote && $c ne "\t") { > + substr($res, $off, 1, 'X'); > + } else { > + substr($res, $off, 1, $c); > + } > + } > + > + if ($sanitise_quote eq '//') { > + $sanitise_quote = ''; > + } > + > + # The pathname on a #include may be surrounded by '<' and '>'. > + if ($res =~ /^.\s*\#\s*include\s+\<(.*)\>/) { > + my $clean = 'X' x length($1); > + $res =~ s@\<.*\>@<$clean>@; > + > + # The whole of a #error is a string. > + } elsif ($res =~ /^.\s*\#\s*(?:error|warning)\s+(.*)\b/) { > + my $clean = 'X' x length($1); > + $res =~ s@(\#\s*(?:error|warning)\s+).*@$1$clean@; > + } > + > + return $res; > +} > + > +sub ctx_statement_block { > + my ($linenr, $remain, $off) = @_; > + my $line = $linenr - 1; > + my $blk = ''; > + my $soff = $off; > + my $coff = $off - 1; > + my $coff_set = 0; > + > + my $loff = 0; > + > + my $type = ''; > + my $level = 0; > + my @stack = (); > + my $p; > + my $c; > + my $len = 0; > + > + my $remainder; > + while (1) { > + @stack = (['', 0]) if ($#stack == -1); > + > + #warn "CSB: blk<$blk> remain<$remain>\n"; > + # If we are about to drop off the end, pull in more > + # context. > + if ($off >= $len) { > + for (; $remain > 0; $line++) { > + last if (!defined $lines[$line]); > + next if ($lines[$line] =~ /^-/); > + $remain--; > + $loff = $len; > + $blk .= $lines[$line] . "\n"; > + $len = length($blk); > + $line++; > + last; > + } > + # Bail if there is no further context. > + #warn "CSB: blk<$blk> off<$off> len<$len>\n"; > + if ($off >= $len) { > + last; > + } > + } > + $p = $c; > + $c = substr($blk, $off, 1); > + $remainder = substr($blk, $off); > + > + #warn "CSB: c<$c> type<$type> level<$level> remainder<$remainder> > coff_set<$coff_set>\n"; > + > + # Handle nested #if/#else. > + if ($remainder =~ /^#\s*(?:ifndef|ifdef|if)\s/) { > + push(@stack, [ $type, $level ]); > + } elsif ($remainder =~ /^#\s*(?:else|elif)\b/) { > + ($type, $level) = @{$stack[$#stack - 1]}; > + } elsif ($remainder =~ /^#\s*endif\b/) { > + ($type, $level) = @{pop(@stack)}; > + } > + > + # Statement ends at the ';' or a close '}' at the > + # outermost level. > + if ($level == 0 && $c eq ';') { > + last; > + } > + > + # An else is really a conditional as long as its not else if > + if ($level == 0 && $coff_set == 0 && > + (!defined($p) || $p =~ /(?:\s|\}|\+)/) && > + $remainder =~ /^(else)(?:\s|{)/ && > + $remainder !~ /^else\s+if\b/) { > + $coff = $off + length($1) - 1; > + $coff_set = 1; > + #warn "CSB: mark coff<$coff> soff<$soff> 1<$1>\n"; > + #warn "[" . substr($blk, $soff, $coff - $soff + 1) . "]\n"; > + } > + > + if (($type eq '' || $type eq '(') && $c eq '(') { > + $level++; > + $type = '('; > + } > + if ($type eq '(' && $c eq ')') { > + $level--; > + $type = ($level != 0)? '(' : ''; > + > + if ($level == 0 && $coff < $soff) { > + $coff = $off; > + $coff_set = 1; > + #warn "CSB: mark coff<$coff>\n"; > + } > + } > + if (($type eq '' || $type eq '{') && $c eq '{') { > + $level++; > + $type = '{'; > + } > + if ($type eq '{' && $c eq '}') { > + $level--; > + $type = ($level != 0)? '{' : ''; > + > + if ($level == 0) { > + if (substr($blk, $off + 1, 1) eq ';') { > + $off++; > + } > + last; > + } > + } > + $off++; > + } > + # We are truly at the end, so shuffle to the next line. > + if ($off == $len) { > + $loff = $len + 1; > + $line++; > + $remain--; > + } > + > + my $statement = substr($blk, $soff, $off - $soff + 1); > + my $condition = substr($blk, $soff, $coff - $soff + 1); > + > + #warn "STATEMENT<$statement>\n"; > + #warn "CONDITION<$condition>\n"; > + > + #print "coff<$coff> soff<$off> loff<$loff>\n"; > + > + return ($statement, $condition, > + $line, $remain + 1, $off - $loff + 1, $level); > +} > + > +sub statement_lines { > + my ($stmt) = @_; > + > + # Strip the diff line prefixes and rip blank lines at start and end. > + $stmt =~ s/(^|\n)./$1/g; > + $stmt =~ s/^\s*//; > + $stmt =~ s/\s*$//; > + > + my @stmt_lines = ($stmt =~ /\n/g); > + > + return $#stmt_lines + 2; > +} > + > +sub statement_rawlines { > + my ($stmt) = @_; > + > + my @stmt_lines = ($stmt =~ /\n/g); > + > + return $#stmt_lines + 2; > +} > + > +sub statement_block_size { > + my ($stmt) = @_; > + > + $stmt =~ s/(^|\n)./$1/g; > + $stmt =~ s/^\s*\{//; > + $stmt =~ s/}\s*$//; > + $stmt =~ s/^\s*//; > + $stmt =~ s/\s*$//; > + > + my @stmt_lines = ($stmt =~ /\n/g); > + my @stmt_statements = ($stmt =~ /;/g); > + > + my $stmt_lines = $#stmt_lines + 2; > + my $stmt_statements = $#stmt_statements + 1; > + > + if ($stmt_lines > $stmt_statements) { > + return $stmt_lines; > + } else { > + return $stmt_statements; > + } > +} > + > +sub ctx_statement_full { > + my ($linenr, $remain, $off) = @_; > + my ($statement, $condition, $level); > + > + my (@chunks); > + > + # Grab the first conditional/block pair. > + ($statement, $condition, $linenr, $remain, $off, $level) = > + ctx_statement_block($linenr, $remain, $off); > + #print "F: c<$condition> s<$statement> remain<$remain>\n"; > + push(@chunks, [ $condition, $statement ]); > + if (!($remain > 0 && $condition =~ > /^\s*(?:\n[+-])?\s*(?:if|else|do)\b/s)) { > + return ($level, $linenr, @chunks); > + } > + > + # Pull in the following conditional/block pairs and see if they > + # could continue the statement. > + for (;;) { > + ($statement, $condition, $linenr, $remain, $off, $level) = > + ctx_statement_block($linenr, $remain, $off); > + #print "C: c<$condition> s<$statement> remain<$remain>\n"; > + last if (!($remain > 0 && $condition =~ > /^(?:\s*\n[+-])*\s*(?:else|do)\b/s)); > + #print "C: push\n"; > + push(@chunks, [ $condition, $statement ]); > + } > + > + return ($level, $linenr, @chunks); > +} > + > +sub ctx_block_get { > + my ($linenr, $remain, $outer, $open, $close, $off) = @_; > + my $line; > + my $start = $linenr - 1; > + my $blk = ''; > + my @o; > + my @c; > + my @res = (); > + > + my $level = 0; > + my @stack = ($level); > + for ($line = $start; $remain > 0; $line++) { > + next if ($rawlines[$line] =~ /^-/); > + $remain--; > + > + $blk .= $rawlines[$line]; > + > + # Handle nested #if/#else. > + if ($lines[$line] =~ /^.\s*#\s*(?:ifndef|ifdef|if)\s/) { > + push(@stack, $level); > + } elsif ($lines[$line] =~ /^.\s*#\s*(?:else|elif)\b/) { > + $level = $stack[$#stack - 1]; > + } elsif ($lines[$line] =~ /^.\s*#\s*endif\b/) { > + $level = pop(@stack); > + } > + > + foreach my $c (split(//, $lines[$line])) { > + ##print "C<$c>L<$level><$open$close>O<$off>\n"; > + if ($off > 0) { > + $off--; > + next; > + } > + > + if ($c eq $close && $level > 0) { > + $level--; > + last if ($level == 0); > + } elsif ($c eq $open) { > + $level++; > + } > + } > + > + if (!$outer || $level <= 1) { > + push(@res, $rawlines[$line]); > + } > + > + last if ($level == 0); > + } > + > + return ($level, @res); > +} > +sub ctx_block_outer { > + my ($linenr, $remain) = @_; > + > + my ($level, @r) = ctx_block_get($linenr, $remain, 1, '{', '}', 0); > + return @r; > +} > +sub ctx_block { > + my ($linenr, $remain) = @_; > + > + my ($level, @r) = ctx_block_get($linenr, $remain, 0, '{', '}', 0); > + return @r; > +} > +sub ctx_statement { > + my ($linenr, $remain, $off) = @_; > + > + my ($level, @r) = ctx_block_get($linenr, $remain, 0, '(', ')', $off); > + return @r; > +} > +sub ctx_block_level { > + my ($linenr, $remain) = @_; > + > + return ctx_block_get($linenr, $remain, 0, '{', '}', 0); > +} > +sub ctx_statement_level { > + my ($linenr, $remain, $off) = @_; > + > + return ctx_block_get($linenr, $remain, 0, '(', ')', $off); > +} > + > +sub ctx_locate_comment { > + my ($first_line, $end_line) = @_; > + > + # Catch a comment on the end of the line itself. > + my ($current_comment) = ($rawlines[$end_line - 1] =~ m@ > .*(/\*.*\*/)\s*(?:\\\s*)?$@); > + return $current_comment if (defined $current_comment); > + > + # Look through the context and try and figure out if there is a > + # comment. > + my $in_comment = 0; > + $current_comment = ''; > + for (my $linenr = $first_line; $linenr < $end_line; $linenr++) { > + my $line = $rawlines[$linenr - 1]; > + #warn " $line\n"; > + if ($linenr == $first_line and $line =~ m@^.\s*\*@) { > + $in_comment = 1; > + } > + if ($line =~ m@/\*@) { > + $in_comment = 1; > + } > + if (!$in_comment && $current_comment ne '') { > + $current_comment = ''; > + } > + $current_comment .= $line . "\n" if ($in_comment); > + if ($line =~ m@\*/@) { > + $in_comment = 0; > + } > + } > + > + chomp($current_comment); > + return($current_comment); > +} > +sub ctx_has_comment { > + my ($first_line, $end_line) = @_; > + my $cmt = ctx_locate_comment($first_line, $end_line); > + > + ##print "LINE: $rawlines[$end_line - 1 ]\n"; > + ##print "CMMT: $cmt\n"; > + > + return ($cmt ne ''); > +} > + > +sub raw_line { > + my ($linenr, $cnt) = @_; > + > + my $offset = $linenr - 1; > + $cnt++; > + > + my $line; > + while ($cnt) { > + $line = $rawlines[$offset++]; > + next if (defined($line) && $line =~ /^-/); > + $cnt--; > + } > + > + return $line; > +} > + > +sub cat_vet { > + my ($vet) = @_; > + my ($res, $coded); > + > + $res = ''; > + while ($vet =~ /([^[:cntrl:]]*)([[:cntrl:]]|$)/g) { > + $res .= $1; > + if ($2 ne '') { > + $coded = sprintf("^%c", unpack('C', $2) + 64); > + $res .= $coded; > + } > + } > + $res =~ s/$/\$/; > + > + return $res; > +} > + > +my $av_preprocessor = 0; > +my $av_pending; > +my @av_paren_type; > +my $av_pend_colon; > + > +sub annotate_reset { > + $av_preprocessor = 0; > + $av_pending = '_'; > + @av_paren_type = ('E'); > + $av_pend_colon = 'O'; > +} > + > +sub annotate_values { > + my ($stream, $type) = @_; > + > + my $res; > + my $var = '_' x length($stream); > + my $cur = $stream; > + > + print "$stream\n" if ($dbg_values > 1); > + > + while (length($cur)) { > + @av_paren_type = ('E') if ($#av_paren_type < 0); > + print " <" . join('', @av_paren_type) . > + "> <$type> <$av_pending>" if ($dbg_values > 1); > + if ($cur =~ /^(\s+)/o) { > + print "WS($1)\n" if ($dbg_values > 1); > + if ($1 =~ /\n/ && $av_preprocessor) { > + $type = pop(@av_paren_type); > + $av_preprocessor = 0; > + } > + > + } elsif ($cur =~ /^(\(\s*$Type\s*)\)/ && $av_pending eq '_') { > + print "CAST($1)\n" if ($dbg_values > 1); > + push(@av_paren_type, $type); > + $type = 'C'; > + > + } elsif ($cur =~ /^($Type)\s*(?:$Ident|,|\)|\(|\s*$)/) { > + print "DECLARE($1)\n" if ($dbg_values > 1); > + $type = 'T'; > + > + } elsif ($cur =~ /^($Modifier)\s*/) { > + print "MODIFIER($1)\n" if ($dbg_values > 1); > + $type = 'T'; > + > + } elsif ($cur =~ /^(\#\s*define\s*$Ident)(\(?)/o) { > + print "DEFINE($1,$2)\n" if ($dbg_values > 1); > *** 1791 LINES SKIPPED *** > > >