#!/usr/bin/env perl # # Copyright (c) 2004, 2008 The NetBSD Foundation, Inc. # All rights reserved. # # This code is derived from software contributed to The NetBSD Foundation # by Hubert Feyrer <hubert@feyrer.de>. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # 1. Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # 2. Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # # THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS # ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED # TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS # BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. # # Extract BSD-mandated copyright messages for NetBSD documentation # # Usage: # 1) find src xsrc -type f -print \ # | perl extract-contrib-string.pl # >x # # 2) merge text after "--------" in "x" into # src/distrib/notes/common/legal.common # # Options: # # perl extract-contrib-string.pl [-d] [-h] [-x] [-?] # # where # -d debug output # -h html output # -x xml/docbook output # -? display help/usage message $ack_line1='([aA]ll( commercial)?( marketing or)? advertising materials mentioning( features)?' . '|\d\. Redistributions of any form whatsoever)'; $ack_line2='(display the( following)?( acknowledge?ment)?|acknowledge?ment:$)'; $ack_endline= '(\d\.\s*(Neither the name' . '|The name of the company nor the name' # Wasn't my idea . '|The name of the author may not' . '|The name of .* must not be used to endorse' . '|The names? (of )?.* nor the names? of' . '|The names? (of )?.* or any of it\'?s members' . '|Redistributions of any form whatsoever' . '|The names .*"OpenSSL Toolkit.*" and .*"OpenSSL Project.*" must not be used' . "|Urbana-Champaign Independent Media Center's name" . '))' .'|(^Neither the name)' .'|(THIS SOFTWARE IS PROVIDED)' .'|(ALL WARRANTIES WITH REGARD)' .'|(The word \'cryptographic\' can be left out if)' .'|(may be used to endorse)' .'|(@end cartouche)' .'|(</para>)' .'|(Redistribution and use in source and binary forms)' .'|(may not be used to endorse)' .'|(\.IP 4)' .'|(ALLOW FREE USE OF)' .'|(materials provided with the distribution)' .'|(@InsertRedistribution@)'; $known_bad_clause_3_wording= 'usr.bin/lex/.*' # UCB .'|dist/bind/contrib/nslint-2.1a3/lbl/.*' # .'|usr.sbin/traceroute/ifaddrlist.h' # .'|usr.sbin/traceroute/traceroute.c' # .'|usr.sbin/hilinfo/hilinfo.c' # CSS @ Utah ; sub warning { local($fn,$msg) = @_; print "XXX $fn line $.: $msg\n" } while ($#ARGV >= 0) { $debug=1 if ($ARGV[0] =~ /-d/i); $html=1 if ($ARGV[0] =~ /-h/i); $xml=1 if ($ARGV[0] =~ /-x/i); $usage=1 if ($ARGV[0] =~ /-\?/); shift(@ARGV); } if ($usage) { print "usage: find /usr/src -type f -print |\n" . " perl extract-contrib-string.pl [-h] [-x] [-?] [-d]\n" . " where\n" . " -h output html\n" . " -x output xml/docbook\n" . " -d debug\n" . " -? display this help message\n"; exit(0); } $comments = !$html && !$xml; file: while(<>) { chomp(); $fn=$_; open(F, "$fn") || die "cannot read $fn: $!\n"; line: while(<F>) { if (0 and /$ack_line2/in){ print "?> $_" if $debug; if ($fn !~ m,$known_bad_clause_3_wording,) { warning($fn, "clause 3 start not caught"); } last line; } print "0> $_" if $debug; # special case perl script generating a license (openssl's # mkerr.pl) - ignore the quoted license, there is another one # inside: if (/^\"\s\*.*$ack_line1.*\\n\"\,/n) { while(!/$ack_endline/in) { print "S> $_" if $debug; $_ = <F>; } } if (/$ack_line1/in or (/$ack_line2/n and $fn =~ m,$known_bad_clause_3_wording,)) { print "1> $_" if $debug; $_=<F> unless $fn =~ m,$known_bad_clause_3_wording,; if (/$ack_line2/in or $fn =~ m,$known_bad_clause_3_wording,){ print "2> $_" if $debug; $msg=""; if ($fn =~ m,$known_bad_clause_3_wording, and /``/) { $msg = $_; } elsif (/:\s+This product/) { # src/sys/lib/libkern/rngtest.c - bad clause 3 wording # that is not like others, so special case it here $msg = $_; $msg =~ s/^.*:\s+(This product.*)$/$1/; } $cnt=0; $_=<F>; while(!/$ack_endline/in) { print "C> $_" if $debug; $msg .= $_; $cnt++; $_ = <F>; if ($cnt > 10) { warning($fn,"loooong copyright?"); last line; } } print "E> $_" if $debug; # post-process if ($fn =~ m,$known_bad_clause_3_wording,) { while ($msg !~ /^.*``.*\n/) { last if (!$msg); $msg =~ s/^.*\n//o; } $msg =~ s/^.*``//o; $msg =~ s/\n.*``//o; $msg =~ s/''.*$//o; } # XXX: pcap &c - add to known_bad_clause_3_wording but # that code seems to have problems. Easier to add a # hack here, shouldn't affect good clause 3. $msg =~ s/''\s+Neither the name.*$//; # *roff while ($msg =~ /^\.\\"\s*/) { $msg =~ s/^\.\\"\s*//o; } while ($msg =~ /\n\.\\"\s*/) { $msg =~ s/\n\.\\"\s*/\n/o; } $msg =~ s/\n\.\\"\s*$/\n/g; # C++/C99 while ($msg =~ /^\s*\/\/\s*/) { $msg =~ s/^\s*\/\/\s*//o; } while ($msg =~ /\n\s*\/\/\s*$/) { $msg =~ s/\n\s*\/\/\s*$//o; } $msg =~ s/\n\s*\/\/\s*/\n/g; # C while ($msg =~ /^\s*\*\s*/) { $msg =~ s/^\s*\*\s*//o; } while ($msg =~ /\n\s*\*\s*$/) { $msg =~ s/\n\s*\*\s*$//o; } $msg =~ s/\n\s*\*\s*/\n/g; # texinfo @c while ($msg =~ /^\s*\@c\s+/) { $msg =~ s/^\s*\@c\s+//o; } while ($msg =~ /\n\s*\@c\s+$/) { $msg =~ s/\n\s*\@c\s+$//o; } $msg =~ s/\n\s*\@c\s+/\n/g; $msg =~ s/^REM\s*//g; # BASIC?!? $msg =~ s/\nREM\s*/\n/g; # BASIC?!? $msg =~ s/^dnl\s*//g; # m4 $msg =~ s/\ndnl\s*/\n/g; # m4 $msg =~ s/^\s+-\s+//g; # seen in docbook files $msg =~ s/\n\s+-\s+/ /g; # $msg =~ s/^[#\\\|";]+\s*//g; # sh etc. $msg =~ s/\n[#\\\|";]+\s*/\n/g; # sh etc. $msg =~ s/^[ *]*//g; # C $msg =~ s/\n[ *]*/\n/g; # C $msg =~ s/\@cartouche\n//; # texinfo $msg =~ s/ //g; $msg =~ s/\s*\n/\n/g; $msg =~ s/^\s*//; $msg =~ s/\\\@/\@/g; $msg =~ s/\n\n/\n/g; $msg =~ s/^\s*``//; $msg =~ s/''\s*$//; $msg =~ s/^\"//o; $msg =~ s/\"$//o; $msg =~ s/\"\.$/./o; # Fix ISO-646-SE spelling of Lule\(oa $msg =~ s/Lule\}/Lule\\(oa/g; # Collapse multiple spaces between words. There are a # few entries with "by__Name" that affects sorting. $msg =~ s/(\w) +(\w)/$1 $2/g; # Split up into separate paragraphs # $msgs=$msg; $msgs=~s/(This (software|product))/|$1/g; $msgs=~s,^\|,,; msg: foreach $msg (split(/\|/, $msgs)) { while ($msg =~ /[\n\s]+$/) { $msg =~ s/[\n\s]+$//o; } next if ($msg eq ""); if ($comments) { print ".\\\" File $fn:\n"; print "$msg"; print "\n\n"; } my $key = lc($msg); # ignore difference in case $key =~ s/\n/ /g; # ignore difference in line breaks $key =~ s/\.$//g; # drop the final dot # push organizations ("by the") to the end of the # sorting order $key =~ s/(developed by) the/$1 ~the/; if (defined $copyrights{$key}) { if ($copyrights{$key} !~ /\.$/ && $msg =~ /\.$/) { print "already there, without dot - overriding!\n" if 1 || $debug; } else { next msg; } } $copyrights{$key} = $msg; } } else { print "?> $_" if $debug; if ($fn !~ m,$known_bad_clause_3_wording,) { warning($fn, "bad clause 3?"); } last line; } } } close(F); } if ($html) { print "<ul>\n"; foreach $key (sort keys %copyrights) { my $msg = $copyrights{$key}; print "<li>$msg</li>\n"; } print "</ul>\n"; } elsif ($xml) { foreach $key (sort keys %copyrights) { my $msg = $copyrights{$key}; print "<listitem>$msg</listitem>\n"; } } else { print "------------------------------------------------------------\n"; $firsttime=1; foreach $key (sort keys %copyrights) { my $msg = $copyrights{$key}; if ($firsttime) { $firsttime=0; } else { print ".It\n"; } print "$msg\n"; } }