Re: git: 3a3c9242739e - main - checkstyle9.pl: Perl script to check if a change is approximately style(9)
- Reply: Warner Losh : "Re: git: 3a3c9242739e - main - checkstyle9.pl: Perl script to check if a change is approximately style(9)"
- In reply to: Warner Losh : "git: 3a3c9242739e - main - checkstyle9.pl: Perl script to check if a change is approximately style(9)"
- Go to: [ bottom of page ] [ top of archives ] [ this month ]
Date: Sat, 25 Mar 2023 17:22:21 UTC
Thanks so much. I've wanted something like this *forever* On Sat, Mar 25, 2023, at 1:08 PM, Warner Losh wrote: > The branch main has been updated by imp: > > URL: https://cgit.FreeBSD.org/src/commit/?id=3a3c9242739efb0c76587ffbaa54c5d10b2cbcb4 <https://cgit.freebsd.org/src/commit/?id=3a3c9242739efb0c76587ffbaa54c5d10b2cbcb4> > > commit 3a3c9242739efb0c76587ffbaa54c5d10b2cbcb4 > Author: Warner Losh <imp@FreeBSD.org> > AuthorDate: 2023-03-14 21:28:05 +0000 > Commit: Warner Losh <imp@FreeBSD.org> > CommitDate: 2023-03-25 17:06:13 +0000 > > checkstyle9.pl: Perl script to check if a change is approximately style(9) > > This code is adapted from the QEMU checkpatch.pl script. It can check > either a patch, a file or a git branch. It tries to warn about things > that I believe might be style(9) violations. It's experimental, since I > heavily hacked on the qemu version to get it to not complain (much) > about iconic code in the tree. At the moment, it's use should be > considered expermental. It will likely miss violations, and complain > about code that's perfectly fine. It's offered as an experiment > and to make it easier for contributors to submit patches. > --- > tools/build/checkstyle9.pl | 2748 ++++++++++++++++++++++++++++++++++++++++++++ > 1 file changed, 2748 insertions(+) > > diff --git a/tools/build/checkstyle9.pl b/tools/build/checkstyle9.pl > new file mode 100755 > index 000000000000..5aec3819bf7c > --- /dev/null > +++ b/tools/build/checkstyle9.pl > @@ -0,0 +1,2748 @@ > +#!/usr/bin/env perl > +# (c) 2001, Dave Jones. (the file handling bit) > +# (c) 2005, Joel Schopp <jschopp@austin.ibm.com> (the ugly bit) > +# (c) 2007,2008, Andy Whitcroft <apw@uk.ibm.com> (new conditions, test suite) > +# (c) 2008-2010 Andy Whitcroft <apw@canonical.com> > +# Licensed under the terms of the GNU GPL License version 2 > + > +use strict; > +use warnings; > +use Term::ANSIColor qw(:constants); > + > +my $P = $0; > +$P =~ s@.*/@@g; > + > +our $SrcFile = qr{\.(?:h|c|cpp|s|S|pl|py|sh)$}; > + > +my $V = '0.31'; > + > +use Getopt::Long qw(:config no_auto_abbrev); > + > +my $quiet = 0; > +my $tree = 1; > +my $chk_signoff = 1; > +my $chk_patch = undef; > +my $chk_branch = undef; > +my $tst_only; > +my $emacs = 0; > +my $terse = 0; > +my $file = undef; > +my $color = "auto"; > +my $no_warnings = 0; > +my $summary = 1; > +my $mailback = 0; > +my $summary_file = 0; > +my $root; > +my %debug; > +my $help = 0; > + > +sub help { > + my ($exitcode) = @_; > + > + print << "EOM"; > +Usage: > + > + $P [OPTION]... [FILE]... > + $P [OPTION]... [GIT-REV-LIST] > + > +Version: $V > + > +Options: > + -q, --quiet quiet > + --patch treat FILE as patchfile > + --branch treat args as GIT revision list > + --emacs emacs compile window format > + --terse one line per report > + -f, --file treat FILE as regular source file > + --strict fail if only warnings are found > + --no-summary suppress the per-file summary > + --mailback only produce a report in case of warnings/errors > + --summary-file include the filename in summary > + --debug KEY=[0|1] turn on/off debugging of KEY, where KEY is one of > + 'values', 'possible', 'type', and 'attr' (default > + is all off) > + --test-only=WORD report only warnings/errors containing WORD > + literally > + --color[=WHEN] Use colors 'always', 'never', or only when output > + is a terminal ('auto'). Default is 'auto'. > + -h, --help, --version display this help and exit > + > +When FILE is - read standard input. > +EOM > + > + exit($exitcode); > +} > + > +# Use at your own risk > +print "\n", MAGENTA, "WARNING:", RESET, " This code is highly experimental ... likely isn't a great style(9) match yet\n\n"; > + > +# Perl's Getopt::Long allows options to take optional arguments after a space. > +# Prevent --color by itself from consuming other arguments > +foreach (@ARGV) { > + if ($_ eq "--color" || $_ eq "-color") { > + $_ = "--color=$color"; > + } > +} > + > +GetOptions( > + 'q|quiet+' => \$quiet, > + 'tree!' => \$tree, > + 'signoff!' => \$chk_signoff, > + 'patch!' => \$chk_patch, > + 'branch!' => \$chk_branch, > + 'emacs!' => \$emacs, > + 'terse!' => \$terse, > + 'f|file!' => \$file, > + 'strict!' => \$no_warnings, > + 'root=s' => \$root, > + 'summary!' => \$summary, > + 'mailback!' => \$mailback, > + 'summary-file!' => \$summary_file, > + > + 'debug=s' => \%debug, > + 'test-only=s' => \$tst_only, > + 'color=s' => \$color, > + 'no-color' => sub { $color = 'never'; }, > + 'h|help' => \$help, > + 'version' => \$help > +) or help(1); > + > +help(0) if ($help); > + > +my $exit = 0; > + > +if ($#ARGV < 0) { > + print "$P: no input files\n"; > + exit(1); > +} > + > +if (!defined $chk_branch && !defined $chk_patch && !defined $file) { > + $chk_branch = $ARGV[0] =~ /.\.\./ ? 1 : 0; > + $file = $ARGV[0] =~ /$SrcFile/ ? 1 : 0; > + $chk_patch = $chk_branch || $file ? 0 : 1; > +} elsif (!defined $chk_branch && !defined $chk_patch) { > + if ($file) { > + $chk_branch = $chk_patch = 0; > + } else { > + $chk_branch = $ARGV[0] =~ /.\.\./ ? 1 : 0; > + $chk_patch = $chk_branch ? 0 : 1; > + } > +} elsif (!defined $chk_branch && !defined $file) { > + if ($chk_patch) { > + $chk_branch = $file = 0; > + } else { > + $chk_branch = $ARGV[0] =~ /.\.\./ ? 1 : 0; > + $file = $chk_branch ? 0 : 1; > + } > +} elsif (!defined $chk_patch && !defined $file) { > + if ($chk_branch) { > + $chk_patch = $file = 0; > + } else { > + $file = $ARGV[0] =~ /$SrcFile/ ? 1 : 0; > + $chk_patch = $file ? 0 : 1; > + } > +} elsif (!defined $chk_branch) { > + $chk_branch = $chk_patch || $file ? 0 : 1; > +} elsif (!defined $chk_patch) { > + $chk_patch = $chk_branch || $file ? 0 : 1; > +} elsif (!defined $file) { > + $file = $chk_patch || $chk_branch ? 0 : 1; > +} > + > +if (($chk_patch && $chk_branch) || > + ($chk_patch && $file) || > + ($chk_branch && $file)) { > + die "Only one of --file, --branch, --patch is permitted\n"; > +} > +if (!$chk_patch && !$chk_branch && !$file) { > + die "One of --file, --branch, --patch is required\n"; > +} > + > +if ($color =~ /^always$/i) { > + $color = 1; > +} elsif ($color =~ /^never$/i) { > + $color = 0; > +} elsif ($color =~ /^auto$/i) { > + $color = (-t STDOUT); > +} else { > + die "Invalid color mode: $color\n"; > +} > + > +my $dbg_values = 0; > +my $dbg_possible = 0; > +my $dbg_type = 0; > +my $dbg_attr = 0; > +my $dbg_adv_dcs = 0; > +my $dbg_adv_checking = 0; > +my $dbg_adv_apw = 0; > +for my $key (keys %debug) { > + ## no critic > + eval "\${dbg_$key} = '$debug{$key}';"; > + die "$@" if ($@); > +} > + > +my $rpt_cleaners = 0; > + > +if ($terse) { > + $emacs = 1; > + $quiet++; > +} > + > +my $emitted_corrupt = 0; > + > +our $Ident = qr{ > + [A-Za-z_][A-Za-z\d_]* > + (?:\s*\#\#\s*[A-Za-z_][A-Za-z\d_]*)* > + }x; > +our $Storage = qr{extern|static|asmlinkage}; > +our $Sparse = qr{ > + __force > + }x; > + > +# Notes to $Attribute: > +our $Attribute = qr{ > + const| > + volatile| > + QEMU_NORETURN| > + QEMU_WARN_UNUSED_RESULT| > + QEMU_SENTINEL| > + QEMU_PACKED| > + GCC_FMT_ATTR > + }x; > +our $Modifier; > +our $Inline = qr{inline}; > +our $Member = qr{->$Ident|\.$Ident|\[[^]]*\]}; > +our $Lval = qr{$Ident(?:$Member)*}; > + > +our $Constant = qr{(?:[0-9]+|0x[0-9a-fA-F]+)[UL]*}; > +our $Assignment = qr{(?:\*\=|/=|%=|\+=|-=|<<=|>>=|&=|\^=|\|=|=)}; > +our $Compare = qr{<=|>=|==|!=|<|>}; > +our $Operators = qr{ > + <=|>=|==|!=| > + =>|->|<<|>>|<|>|!|~| > + &&|\|\||,|\^|\+\+|--|&|\||\+|-|\*|\/|% > + }x; > + > +our $NonptrType; > +our $Type; > +our $Declare; > + > +our $NON_ASCII_UTF8 = qr{ > + [\xC2-\xDF][\x80-\xBF] # non-overlong 2-byte > + | \xE0[\xA0-\xBF][\x80-\xBF] # excluding overlongs > + | [\xE1-\xEC\xEE\xEF][\x80-\xBF]{2} # straight 3-byte > + | \xED[\x80-\x9F][\x80-\xBF] # excluding surrogates > + | \xF0[\x90-\xBF][\x80-\xBF]{2} # planes 1-3 > + | [\xF1-\xF3][\x80-\xBF]{3} # planes 4-15 > + | \xF4[\x80-\x8F][\x80-\xBF]{2} # plane 16 > +}x; > + > +our $UTF8 = qr{ > + [\x09\x0A\x0D\x20-\x7E] # ASCII > + | $NON_ASCII_UTF8 > +}x; > + > +# some readers default to ISO-8859-1 when showing email source. detect > +# when UTF-8 is incorrectly interpreted as ISO-8859-1 and reencoded back. > +# False positives are possible but very unlikely. > +our $UTF8_MOJIBAKE = qr{ > + \xC3[\x82-\x9F] \xC2[\x80-\xBF] # c2-df 80-bf > + | \xC3\xA0 \xC2[\xA0-\xBF] \xC2[\x80-\xBF] # e0 a0-bf 80-bf > + | \xC3[\xA1-\xAC\xAE\xAF] (?: \xC2[\x80-\xBF]){2} # e1-ec/ee/ef 80-bf 80-bf > + | \xC3\xAD \xC2[\x80-\x9F] \xC2[\x80-\xBF] # ed 80-9f 80-bf > + | \xC3\xB0 \xC2[\x90-\xBF] (?: \xC2[\x80-\xBF]){2} # f0 90-bf 80-bf 80-bf > + | \xC3[\xB1-\xB3] (?: \xC2[\x80-\xBF]){3} # f1-f3 80-bf 80-bf 80-bf > + | \xC3\xB4 \xC2[\x80-\x8F] (?: \xC2[\x80-\xBF]){2} # f4 80-b8 80-bf 80-bf > +}x; > + > +# There are still some false positives, but this catches most > +# common cases. > +our $typeTypedefs = qr{(?x: > + (?![KMGTPE]iB) # IEC binary prefix (do not match) > + [A-Z][A-Z\d_]*[a-z][A-Za-z\d_]* # camelcase > + | [A-Z][A-Z\d_]*AIOCB # all uppercase > + | [A-Z][A-Z\d_]*CPU # all uppercase > + | QEMUBH # all uppercase > +)}; > + > +our @typeList = ( > + qr{void}, > + qr{(?:unsigned\s+)?char}, > + qr{(?:unsigned\s+)?short}, > + qr{(?:unsigned\s+)?int}, > + qr{(?:unsigned\s+)?long}, > + qr{(?:unsigned\s+)?long\s+int}, > + qr{(?:unsigned\s+)?long\s+long}, > + qr{(?:unsigned\s+)?long\s+long\s+int}, > + qr{unsigned}, > + qr{float}, > + qr{double}, > + qr{bool}, > + qr{struct\s+$Ident}, > + qr{union\s+$Ident}, > + qr{enum\s+$Ident}, > + qr{${Ident}_t}, > + qr{${Ident}_handler}, > + qr{${Ident}_handler_fn}, > + qr{target_(?:u)?long}, > + qr{hwaddr}, > +); > + > +# This can be modified by sub possible. Since it can be empty, be careful > +# about regexes that always match, because they can cause infinite loops. > +our @modifierList = ( > +); > + > +sub build_types { > + my $all = "(?x: \n" . join("|\n ", @typeList) . "\n)"; > + if (@modifierList > 0) { > + my $mods = "(?x: \n" . join("|\n ", @modifierList) . "\n)"; > + $Modifier = qr{(?:$Attribute|$Sparse|$mods)}; > + } else { > + $Modifier = qr{(?:$Attribute|$Sparse)}; > + } > + $NonptrType = qr{ > + (?:$Modifier\s+|const\s+)* > + (?: > + (?:typeof|__typeof__)\s*\(\s*\**\s*$Ident\s*\)| > + (?:$typeTypedefs\b)| > + (?:${all}\b) > + ) > + (?:\s+$Modifier|\s+const)* > + }x; > + $Type = qr{ > + $NonptrType > + (?:[\s\*]+\s*const|[\s\*]+|(?:\s*\[\s*\])+)? > + (?:\s+$Inline|\s+$Modifier)* > + }x; > + $Declare = qr{(?:$Storage\s+)?$Type}; > +} > +build_types(); > + > +$chk_signoff = 0 if ($file); > + > +my @rawlines = (); > +my @lines = (); > +my $vname; > +if ($chk_branch) { > + my @patches; > + my %git_commits = (); > + my $HASH; > + open($HASH, "-|", "git", "log", "--reverse", "--no-merges", "--format=%H %s", $ARGV[0]) || > + die "$P: git log --reverse --no-merges --format='%H %s' $ARGV[0] failed - $!\n"; > + > + for my $line (<$HASH>) { > + $line =~ /^([0-9a-fA-F]{40,40}) (.*)$/; > + next if (!defined($1) || !defined($2)); > + my $sha1 = $1; > + my $subject = $2; > + push(@patches, $sha1); > + $git_commits{$sha1} = $subject; > + } > + > + close $HASH; > + > + die "$P: no revisions returned for revlist '$ARGV[0]'\n" > + unless @patches; > + > + my $i = 1; > + my $num_patches = @patches; > + for my $hash (@patches) { > + my $FILE; > + open($FILE, '-|', "git", "show", "--patch-with-stat", $hash) || > + die "$P: git show $hash - $!\n"; > + while (<$FILE>) { > + chomp; > + push(@rawlines, $_); > + } > + close($FILE); > + $vname = substr($hash, 0, 12) . ' (' . $git_commits{$hash} . ')'; > + if ($num_patches > 1 && $quiet == 0) { > + my $prefix = "$i/$num_patches"; > + $prefix = BLUE . BOLD . $prefix . RESET if $color; > + print "$prefix Checking commit $vname\n"; > + $vname = "Patch $i/$num_patches"; > + } else { > + $vname = "Commit " . $vname; > + } > + if (!process($hash)) { > + $exit = 1; > + print "\n" if ($num_patches > 1 && $quiet == 0); > + } > + @rawlines = (); > + @lines = (); > + $i++; > + } > +} else { > + for my $filename (@ARGV) { > + my $FILE; > + if ($file) { > + open($FILE, '-|', "diff -u /dev/null $filename") || > + die "$P: $filename: diff failed - $!\n"; > + } elsif ($filename eq '-') { > + open($FILE, '<&STDIN'); > + } else { > + open($FILE, '<', "$filename") || > + die "$P: $filename: open failed - $!\n"; > + } > + if ($filename eq '-') { > + $vname = 'Your patch'; > + } else { > + $vname = $filename; > + } > + print "Checking $filename...\n" if @ARGV > 1 && $quiet == 0; > + while (<$FILE>) { > + chomp; > + push(@rawlines, $_); > + } > + close($FILE); > + if (!process($filename)) { > + $exit = 1; > + } > + @rawlines = (); > + @lines = (); > + } > +} > + > +exit($exit); > + > +sub top_of_kernel_tree { > + my ($root) = @_; > + > + my @tree_check = ( > + "Makefile.inc1", "README.md", "sys", > + "usr.sbin" > + ); > + > + foreach my $check (@tree_check) { > + if (! -e $root . '/' . $check) { > + return 0; > + } > + } > + return 1; > +} > + > +sub expand_tabs { > + my ($str) = @_; > + > + my $res = ''; > + my $n = 0; > + for my $c (split(//, $str)) { > + if ($c eq "\t") { > + $res .= ' '; > + $n++; > + for (; ($n % 8) != 0; $n++) { > + $res .= ' '; > + } > + next; > + } > + $res .= $c; > + $n++; > + } > + > + return $res; > +} > +sub copy_spacing { > + (my $res = shift) =~ tr/\t/ /c; > + return $res; > +} > + > +sub line_stats { > + my ($line) = @_; > + > + # Drop the diff line leader and expand tabs > + $line =~ s/^.//; > + $line = expand_tabs($line); > + > + # Pick the indent from the front of the line. > + my ($white) = ($line =~ /^(\s*)/); > + > + return (length($line), length($white)); > +} > + > +my $sanitise_quote = ''; > + > +sub sanitise_line_reset { > + my ($in_comment) = @_; > + > + if ($in_comment) { > + $sanitise_quote = '*/'; > + } else { > + $sanitise_quote = ''; > + } > +} > +sub sanitise_line { > + my ($line) = @_; > + > + my $res = ''; > + my $l = ''; > + > + my $qlen = 0; > + my $off = 0; > + my $c; > + > + # Always copy over the diff marker. > + $res = substr($line, 0, 1); > + > + for ($off = 1; $off < length($line); $off++) { > + $c = substr($line, $off, 1); > + > + # Comments we are wacking completely including the begin > + # and end, all to $;. > + if ($sanitise_quote eq '' && substr($line, $off, 2) eq '/*') { > + $sanitise_quote = '*/'; > + > + substr($res, $off, 2, "$;$;"); > + $off++; > + next; > + } > + if ($sanitise_quote eq '*/' && substr($line, $off, 2) eq '*/') { > + $sanitise_quote = ''; > + substr($res, $off, 2, "$;$;"); > + $off++; > + next; > + } > + if ($sanitise_quote eq '' && substr($line, $off, 2) eq '//') { > + $sanitise_quote = '//'; > + > + substr($res, $off, 2, $sanitise_quote); > + $off++; > + next; > + } > + > + # A \ in a string means ignore the next character. > + if (($sanitise_quote eq "'" || $sanitise_quote eq '"') && > + $c eq "\\") { > + substr($res, $off, 2, 'XX'); > + $off++; > + next; > + } > + # Regular quotes. > + if ($c eq "'" || $c eq '"') { > + if ($sanitise_quote eq '') { > + $sanitise_quote = $c; > + > + substr($res, $off, 1, $c); > + next; > + } elsif ($sanitise_quote eq $c) { > + $sanitise_quote = ''; > + } > + } > + > + #print "c<$c> SQ<$sanitise_quote>\n"; > + if ($off != 0 && $sanitise_quote eq '*/' && $c ne "\t") { > + substr($res, $off, 1, $;); > + } elsif ($off != 0 && $sanitise_quote eq '//' && $c ne "\t") { > + substr($res, $off, 1, $;); > + } elsif ($off != 0 && $sanitise_quote && $c ne "\t") { > + substr($res, $off, 1, 'X'); > + } else { > + substr($res, $off, 1, $c); > + } > + } > + > + if ($sanitise_quote eq '//') { > + $sanitise_quote = ''; > + } > + > + # The pathname on a #include may be surrounded by '<' and '>'. > + if ($res =~ /^.\s*\#\s*include\s+\<(.*)\>/) { > + my $clean = 'X' x length($1); > + $res =~ s@\<.*\>@<$clean>@; > + > + # The whole of a #error is a string. > + } elsif ($res =~ /^.\s*\#\s*(?:error|warning)\s+(.*)\b/) { > + my $clean = 'X' x length($1); > + $res =~ s@(\#\s*(?:error|warning)\s+).*@$1$clean@; > + } > + > + return $res; > +} > + > +sub ctx_statement_block { > + my ($linenr, $remain, $off) = @_; > + my $line = $linenr - 1; > + my $blk = ''; > + my $soff = $off; > + my $coff = $off - 1; > + my $coff_set = 0; > + > + my $loff = 0; > + > + my $type = ''; > + my $level = 0; > + my @stack = (); > + my $p; > + my $c; > + my $len = 0; > + > + my $remainder; > + while (1) { > + @stack = (['', 0]) if ($#stack == -1); > + > + #warn "CSB: blk<$blk> remain<$remain>\n"; > + # If we are about to drop off the end, pull in more > + # context. > + if ($off >= $len) { > + for (; $remain > 0; $line++) { > + last if (!defined $lines[$line]); > + next if ($lines[$line] =~ /^-/); > + $remain--; > + $loff = $len; > + $blk .= $lines[$line] . "\n"; > + $len = length($blk); > + $line++; > + last; > + } > + # Bail if there is no further context. > + #warn "CSB: blk<$blk> off<$off> len<$len>\n"; > + if ($off >= $len) { > + last; > + } > + } > + $p = $c; > + $c = substr($blk, $off, 1); > + $remainder = substr($blk, $off); > + > + #warn "CSB: c<$c> type<$type> level<$level> remainder<$remainder> coff_set<$coff_set>\n"; > + > + # Handle nested #if/#else. > + if ($remainder =~ /^#\s*(?:ifndef|ifdef|if)\s/) { > + push(@stack, [ $type, $level ]); > + } elsif ($remainder =~ /^#\s*(?:else|elif)\b/) { > + ($type, $level) = @{$stack[$#stack - 1]}; > + } elsif ($remainder =~ /^#\s*endif\b/) { > + ($type, $level) = @{pop(@stack)}; > + } > + > + # Statement ends at the ';' or a close '}' at the > + # outermost level. > + if ($level == 0 && $c eq ';') { > + last; > + } > + > + # An else is really a conditional as long as its not else if > + if ($level == 0 && $coff_set == 0 && > + (!defined($p) || $p =~ /(?:\s|\}|\+)/) && > + $remainder =~ /^(else)(?:\s|{)/ && > + $remainder !~ /^else\s+if\b/) { > + $coff = $off + length($1) - 1; > + $coff_set = 1; > + #warn "CSB: mark coff<$coff> soff<$soff> 1<$1>\n"; > + #warn "[" . substr($blk, $soff, $coff - $soff + 1) . "]\n"; > + } > + > + if (($type eq '' || $type eq '(') && $c eq '(') { > + $level++; > + $type = '('; > + } > + if ($type eq '(' && $c eq ')') { > + $level--; > + $type = ($level != 0)? '(' : ''; > + > + if ($level == 0 && $coff < $soff) { > + $coff = $off; > + $coff_set = 1; > + #warn "CSB: mark coff<$coff>\n"; > + } > + } > + if (($type eq '' || $type eq '{') && $c eq '{') { > + $level++; > + $type = '{'; > + } > + if ($type eq '{' && $c eq '}') { > + $level--; > + $type = ($level != 0)? '{' : ''; > + > + if ($level == 0) { > + if (substr($blk, $off + 1, 1) eq ';') { > + $off++; > + } > + last; > + } > + } > + $off++; > + } > + # We are truly at the end, so shuffle to the next line. > + if ($off == $len) { > + $loff = $len + 1; > + $line++; > + $remain--; > + } > + > + my $statement = substr($blk, $soff, $off - $soff + 1); > + my $condition = substr($blk, $soff, $coff - $soff + 1); > + > + #warn "STATEMENT<$statement>\n"; > + #warn "CONDITION<$condition>\n"; > + > + #print "coff<$coff> soff<$off> loff<$loff>\n"; > + > + return ($statement, $condition, > + $line, $remain + 1, $off - $loff + 1, $level); > +} > + > +sub statement_lines { > + my ($stmt) = @_; > + > + # Strip the diff line prefixes and rip blank lines at start and end. > + $stmt =~ s/(^|\n)./$1/g; > + $stmt =~ s/^\s*//; > + $stmt =~ s/\s*$//; > + > + my @stmt_lines = ($stmt =~ /\n/g); > + > + return $#stmt_lines + 2; > +} > + > +sub statement_rawlines { > + my ($stmt) = @_; > + > + my @stmt_lines = ($stmt =~ /\n/g); > + > + return $#stmt_lines + 2; > +} > + > +sub statement_block_size { > + my ($stmt) = @_; > + > + $stmt =~ s/(^|\n)./$1/g; > + $stmt =~ s/^\s*\{//; > + $stmt =~ s/}\s*$//; > + $stmt =~ s/^\s*//; > + $stmt =~ s/\s*$//; > + > + my @stmt_lines = ($stmt =~ /\n/g); > + my @stmt_statements = ($stmt =~ /;/g); > + > + my $stmt_lines = $#stmt_lines + 2; > + my $stmt_statements = $#stmt_statements + 1; > + > + if ($stmt_lines > $stmt_statements) { > + return $stmt_lines; > + } else { > + return $stmt_statements; > + } > +} > + > +sub ctx_statement_full { > + my ($linenr, $remain, $off) = @_; > + my ($statement, $condition, $level); > + > + my (@chunks); > + > + # Grab the first conditional/block pair. > + ($statement, $condition, $linenr, $remain, $off, $level) = > + ctx_statement_block($linenr, $remain, $off); > + #print "F: c<$condition> s<$statement> remain<$remain>\n"; > + push(@chunks, [ $condition, $statement ]); > + if (!($remain > 0 && $condition =~ /^\s*(?:\n[+-])?\s*(?:if|else|do)\b/s)) { > + return ($level, $linenr, @chunks); > + } > + > + # Pull in the following conditional/block pairs and see if they > + # could continue the statement. > + for (;;) { > + ($statement, $condition, $linenr, $remain, $off, $level) = > + ctx_statement_block($linenr, $remain, $off); > + #print "C: c<$condition> s<$statement> remain<$remain>\n"; > + last if (!($remain > 0 && $condition =~ /^(?:\s*\n[+-])*\s*(?:else|do)\b/s)); > + #print "C: push\n"; > + push(@chunks, [ $condition, $statement ]); > + } > + > + return ($level, $linenr, @chunks); > +} > + > +sub ctx_block_get { > + my ($linenr, $remain, $outer, $open, $close, $off) = @_; > + my $line; > + my $start = $linenr - 1; > + my $blk = ''; > + my @o; > + my @c; > + my @res = (); > + > + my $level = 0; > + my @stack = ($level); > + for ($line = $start; $remain > 0; $line++) { > + next if ($rawlines[$line] =~ /^-/); > + $remain--; > + > + $blk .= $rawlines[$line]; > + > + # Handle nested #if/#else. > + if ($lines[$line] =~ /^.\s*#\s*(?:ifndef|ifdef|if)\s/) { > + push(@stack, $level); > + } elsif ($lines[$line] =~ /^.\s*#\s*(?:else|elif)\b/) { > + $level = $stack[$#stack - 1]; > + } elsif ($lines[$line] =~ /^.\s*#\s*endif\b/) { > + $level = pop(@stack); > + } > + > + foreach my $c (split(//, $lines[$line])) { > + ##print "C<$c>L<$level><$open$close>O<$off>\n"; > + if ($off > 0) { > + $off--; > + next; > + } > + > + if ($c eq $close && $level > 0) { > + $level--; > + last if ($level == 0); > + } elsif ($c eq $open) { > + $level++; > + } > + } > + > + if (!$outer || $level <= 1) { > + push(@res, $rawlines[$line]); > + } > + > + last if ($level == 0); > + } > + > + return ($level, @res); > +} > +sub ctx_block_outer { > + my ($linenr, $remain) = @_; > + > + my ($level, @r) = ctx_block_get($linenr, $remain, 1, '{', '}', 0); > + return @r; > +} > +sub ctx_block { > + my ($linenr, $remain) = @_; > + > + my ($level, @r) = ctx_block_get($linenr, $remain, 0, '{', '}', 0); > + return @r; > +} > +sub ctx_statement { > + my ($linenr, $remain, $off) = @_; > + > + my ($level, @r) = ctx_block_get($linenr, $remain, 0, '(', ')', $off); > + return @r; > +} > +sub ctx_block_level { > + my ($linenr, $remain) = @_; > + > + return ctx_block_get($linenr, $remain, 0, '{', '}', 0); > +} > +sub ctx_statement_level { > + my ($linenr, $remain, $off) = @_; > + > + return ctx_block_get($linenr, $remain, 0, '(', ')', $off); > +} > + > +sub ctx_locate_comment { > + my ($first_line, $end_line) = @_; > + > + # Catch a comment on the end of the line itself. > + my ($current_comment) = ($rawlines[$end_line - 1] =~ m@.*(/\*.*\*/)\s*(?:\\\s*)?$@); > + return $current_comment if (defined $current_comment); > + > + # Look through the context and try and figure out if there is a > + # comment. > + my $in_comment = 0; > + $current_comment = ''; > + for (my $linenr = $first_line; $linenr < $end_line; $linenr++) { > + my $line = $rawlines[$linenr - 1]; > + #warn " $line\n"; > + if ($linenr == $first_line and $line =~ m@^.\s*\*@) { > + $in_comment = 1; > + } > + if ($line =~ m@/\*@) { > + $in_comment = 1; > + } > + if (!$in_comment && $current_comment ne '') { > + $current_comment = ''; > + } > + $current_comment .= $line . "\n" if ($in_comment); > + if ($line =~ m@\*/@) { > + $in_comment = 0; > + } > + } > + > + chomp($current_comment); > + return($current_comment); > +} > +sub ctx_has_comment { > + my ($first_line, $end_line) = @_; > + my $cmt = ctx_locate_comment($first_line, $end_line); > + > + ##print "LINE: $rawlines[$end_line - 1 ]\n"; > + ##print "CMMT: $cmt\n"; > + > + return ($cmt ne ''); > +} > + > +sub raw_line { > + my ($linenr, $cnt) = @_; > + > + my $offset = $linenr - 1; > + $cnt++; > + > + my $line; > + while ($cnt) { > + $line = $rawlines[$offset++]; > + next if (defined($line) && $line =~ /^-/); > + $cnt--; > + } > + > + return $line; > +} > + > +sub cat_vet { > + my ($vet) = @_; > + my ($res, $coded); > + > + $res = ''; > + while ($vet =~ /([^[:cntrl:]]*)([[:cntrl:]]|$)/g) { > + $res .= $1; > + if ($2 ne '') { > + $coded = sprintf("^%c", unpack('C', $2) + 64); > + $res .= $coded; > + } > + } > + $res =~ s/$/\$/; > + > + return $res; > +} > + > +my $av_preprocessor = 0; > +my $av_pending; > +my @av_paren_type; > +my $av_pend_colon; > + > +sub annotate_reset { > + $av_preprocessor = 0; > + $av_pending = '_'; > + @av_paren_type = ('E'); > + $av_pend_colon = 'O'; > +} > + > +sub annotate_values { > + my ($stream, $type) = @_; > + > + my $res; > + my $var = '_' x length($stream); > + my $cur = $stream; > + > + print "$stream\n" if ($dbg_values > 1); > + > + while (length($cur)) { > + @av_paren_type = ('E') if ($#av_paren_type < 0); > + print " <" . join('', @av_paren_type) . > + "> <$type> <$av_pending>" if ($dbg_values > 1); > + if ($cur =~ /^(\s+)/o) { > + print "WS($1)\n" if ($dbg_values > 1); > + if ($1 =~ /\n/ && $av_preprocessor) { > + $type = pop(@av_paren_type); > + $av_preprocessor = 0; > + } > + > + } elsif ($cur =~ /^(\(\s*$Type\s*)\)/ && $av_pending eq '_') { > + print "CAST($1)\n" if ($dbg_values > 1); > + push(@av_paren_type, $type); > + $type = 'C'; > + > + } elsif ($cur =~ /^($Type)\s*(?:$Ident|,|\)|\(|\s*$)/) { > + print "DECLARE($1)\n" if ($dbg_values > 1); > + $type = 'T'; > + > + } elsif ($cur =~ /^($Modifier)\s*/) { > + print "MODIFIER($1)\n" if ($dbg_values > 1); > + $type = 'T'; > + > + } elsif ($cur =~ /^(\#\s*define\s*$Ident)(\(?)/o) { > + print "DEFINE($1,$2)\n" if ($dbg_values > 1); > *** 1791 LINES SKIPPED *** >