file isExecutable
Prep for 2.10.10
# check_po.pl - check po file translations for likely errors # Written by David W. Pfitzner dwp@mso.anu.edu.au # This script is hereby placed in the Public Domain. # Various checks on po file translations: # - printf-style format strings; # - differences in trailing newlines; # - empty (non-fuzzy) msgid; # - likely whitespace errors on joining multi-line entries # Ignores all fuzzy entries. # -x Don't do standard checks above (eg, just check one of below). # -n Check newlines within strings; ie, that have equal numbers # of newlines in msgstr and msgid. (Optional because this may # -w Check leading whitespace. Sometimes whitespace is simply # spacing (eg, for widget labels etc), or punctuation differences, # -W Check trailing whitespace. See -w above. # -p Check trailing punctuation. # -c Check capitalization of first non-whitespace character # -e Check on empty (c.q. new) msgstr # Reads stdin (or filename args, via <>), writes any problems to stdout. # Modified by Davide Pagnin nightmare@freeciv.it to support plural forms # Version: 0.41 (2002-06-06) # TODO: This script needs to be able to handle Farsi's %Id flag for # number format specifiers. More information on how it works, see # http://www.gnu.org/software/hello/manual/gettext/c_002dformat.html # It's possible someone has already made this change... look around # for an updated version of this script. use vars qw($opt_c $opt_n $opt_p $opt_w $opt_W $opt_x $opt_e); # Globals, for current po entry: # Note that msgid and msgstr have newlines represented by the # two characters '\' and 'n' (and similarly for other escapes). my @amsgid; # lines exactly as in input my $entryline; # lineno where entry starts my $msgid; # lines joined by "" my $state; # From constant values below. my $did_print; # Whether we have printed this entry, to # print only once for multiple problems. use constant S_LOOKING_START => 0; # looking for start of entry use constant S_DOING_MSGID => 1; # doing msgid part use constant S_DOING_MSGSTR => 2; # doing msgstr part # Initialize or reinitalize globals to prepare for new entry: $state = S_LOOKING_START; # Nicely print either a "msgid" or "msgstr" (name is one of these) # with given array of data. print " $name \"", join("\"\n \"", @_), "\"\n"; # Print a problem (args like print()), preceeded by entry unless # we have already printed that: label, and msgid and msgstr. print "ENTRY:", ($ARGV eq "-" ? "" : " ($ARGV, line $entryline)"), "\n"; print_one("msgid", @amsgid); print_one("msgstr", @amsgstr); # Check final newline: probably, translations should end in a newline # if and only if the original string does. # (See also check_trailing_whitespace and check_num_newlines below.) sub check_trailing_newlines { $ichar = (length($msgid)>=2) ? substr($msgid, -2, 2) : ""; $schar = (length($msgstr)>=2) ? substr($msgstr, -2, 2) : ""; if ($ichar eq "\\n" && $schar ne "\\n") { print_problem "Missing trailing newline\n"; if ($ichar ne "\\n" && $schar eq "\\n") { print_problem "Extra trailing newline\n"; # Check leading whitespace. In general, any leading whitespace should # be the same in msgstr and msgid -- but not always. sub check_leading_whitespace { unless ($opt_w) { return; } if ($msgid =~ m/^(\s+)/) { if ($msgstr =~ m/^(\s+)/) { print_problem "Different leading whitespace\n"; # Check trailing whitespace. In general, any trailing whitespace should # be the same in msgstr and msgid -- but not always. sub check_trailing_whitespace { unless ($opt_W) { return; } if ($msgid =~ m/((?:\s|\\n)+)$/) { if ($msgstr =~ m/((?:\s|\\n)+)$/) { print_problem "Different trailing whitespace\n"; # Check equal numbers of newlines. In general ... etc. unless ($opt_n) { return; } my $num_i = ($msgid =~ m(\\n)g); my $num_s = ($msgstr =~ m(\\n)g); print_problem "Mismatch in newline count\n"; # Check capitalization of first non-whitespace character (for [a-zA-Z] # only). In general ... etc. sub check_leading_capitalization { unless ($opt_c) { return; } if ($msgid =~ m/^\s*([a-zA-Z])/) { if ($msgstr =~ m/^\s*([a-zA-Z])/) { if (defined($id) && defined($str)) { if (($id =~ /^[a-z]$/ && $str =~ /^[A-Z]$/) || ($id =~ /^[A-Z]$/ && $str =~ /^[a-z]$/)) { print_problem "Different leading capitalization\n"; # Check trailing 'punctuation' characters (ignoring trailing whitespace). sub check_trailing_punctuation { unless ($opt_p) { return; } # Might want more characters: if ($msgid =~ m/([\\\.\/\,\!\?\"\'\:\;])+(?:\s|\\n)*$/) { if ($msgstr =~ m/([\\\.\/\,\!\?\"\'\:\;])+(?:\s|\\n)*$/) { print_problem "Different trailing punctuation\n"; # Check that multiline strings have whitespace separation, since # msgstr "this is a multiline" # "this is a multilinestring" sub check_whitespace_joins { foreach my $aref (\@amsgid, \@amsgstr) { foreach my $line (@$aref) { print_problem("Possible non-whitespace line-join problem in ", ($i==0 ? "msgid" : "msgstr"), " \n"); # Check printf-style format entries. # Non-trivial, because translation strings may use format specifiers # out of order, or skip some specifiers etc. Also gettext marks # anything with '%' as cformat, though not all are. unless ($is_cformat) { return; } @iform = ($msgid =~ m/\%[0-9\.\$]*[a-z]/g); @sform = ($msgstr =~ m/\%[0-9\.\$]*[a-z]/g); ##print join("::", @iform), "\n"; ##print join("::", @sform), "\n"; my $j; # index into iform for ($js=0; $js < @sform; $js++) { if ($sf =~ s/^\%([0-9]+)\$(.*[a-z])$/\%$2/) { print_problem("Format number mismatch for $sf_orig [msgstr:", print_problem("Format mismatch: $sf_orig [msgstr:", ($js+1), "]", " vs $if [msgid:", ($j+1), "]\n"); # Run all individual checks on current entry, reporting any problems. $msgid = join("", @amsgid); $msgstr = join("", @amsgstr); print_problem "Zero length msgid\n"; if (length($msgstr)==0) { unless ($opt_e) { return; } print_problem "Untranslated msgid\n"; check_leading_whitespace; check_trailing_whitespace; check_leading_capitalization; check_trailing_punctuation; if ($state==S_DOING_MSGSTR) { if ( m(^\#, .*c-format) ) { # .* is because can have fuzzy, c-format if ( m(^msgid \"(.*)\"$) ) { if ( m(^msgid_plural \"(.*)\"$) ) { if ( m(^msgstr \"(.*)\"$) ) { if ( m(^msgstr\[[0-5]\] \"(.*)\"$) ) { if ($state==S_DOING_MSGID) { } elsif($state==S_DOING_MSGSTR) { die "Looking at string $_ in bad state $state,"; die "Unexpected at $.: ", $_;