Package translate :: Package filters :: Module checks
[hide private]
[frames] | no frames]

Source Code for Module translate.filters.checks

   1  #!/usr/bin/env python 
   2  # -*- coding: utf-8 -*- 
   3  # 
   4  # Copyright 2004-2008 Zuza Software Foundation 
   5  # 
   6  # This file is part of translate. 
   7  # 
   8  # translate is free software; you can redistribute it and/or modify 
   9  # it under the terms of the GNU General Public License as published by 
  10  # the Free Software Foundation; either version 2 of the License, or 
  11  # (at your option) any later version. 
  12  # 
  13  # translate is distributed in the hope that it will be useful, 
  14  # but WITHOUT ANY WARRANTY; without even the implied warranty of 
  15  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
  16  # GNU General Public License for more details. 
  17  # 
  18  # You should have received a copy of the GNU General Public License 
  19  # along with translate; if not, write to the Free Software 
  20  # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA 
  21   
  22  """This is a set of validation checks that can be performed on translation 
  23  units. 
  24   
  25  Derivatives of UnitChecker (like StandardUnitChecker) check translation units, 
  26  and derivatives of TranslationChecker (like StandardChecker) check 
  27  (source, target) translation pairs. 
  28   
  29  When adding a new test here, please document and explain the behaviour on the 
  30  U{wiki <http://translate.sourceforge.net/wiki/toolkit/pofilter_tests>}. 
  31  """ 
  32   
  33  from translate.filters import helpers 
  34  from translate.filters import decoration 
  35  from translate.filters import prefilters 
  36  from translate.filters import spelling 
  37  from translate.lang import factory 
  38  from translate.lang import data 
  39  # The import of xliff could fail if the user doesn't have lxml installed. For 
  40  # now we try to continue gracefully to help users who aren't interested in 
  41  # support for XLIFF or other XML formats. 
  42  try: 
  43      from translate.storage import xliff 
  44  except ImportError, e: 
  45      xliff = None 
  46  # The import of xliff fail silently in the absence of lxml if another module 
  47  # already tried to import it unsuccessfully, so let's make 100% sure: 
  48  if not hasattr(xliff, "xliffunit"): 
  49      xliff = None 
  50  import re 
  51   
  52  # These are some regular expressions that are compiled for use in some tests 
  53   
  54  # printf syntax based on http://en.wikipedia.org/wiki/Printf which doens't 
  55  # cover everything we leave \w instead of specifying the exact letters as 
  56  # this should capture printf types defined in other platforms. 
  57  # extended to support Python named format specifiers 
  58  printf_pat = re.compile('%((?:(?P<ord>\d+)\$|\((?P<key>\w+)\))?(?P<fullvar>[+#-]*(?:\d+)?(?:\.\d+)?(hh\|h\|l\|ll)?(?P<type>[\w%])))') 
  59   
  60  # The name of the XML tag 
  61  tagname_re = re.compile("<[\s]*([\w\/]*)") 
  62   
  63  # We allow escaped quotes, probably for old escaping style of OOo helpcontent 
  64  #TODO: remove escaped strings once usage is audited 
  65  property_re = re.compile(" (\w*)=((\\\\?\".*?\\\\?\")|(\\\\?'.*?\\\\?'))") 
  66   
  67  # The whole tag 
  68  tag_re = re.compile("<[^>]+>") 
  69   
  70  gconf_attribute_re = re.compile('"[a-z_]+?"') 
  71   
  72   
73 -def tagname(string):
74 """Returns the name of the XML/HTML tag in string""" 75 return tagname_re.match(string).groups(1)[0]
76 77
78 -def intuplelist(pair, list):
79 """Tests to see if pair == (a,b,c) is in list, but handles None entries in 80 list as wildcards (only allowed in positions "a" and "c"). We take a 81 shortcut by only considering "c" if "b" has already matched.""" 82 a, b, c = pair 83 if (b, c) == (None, None): 84 #This is a tagname 85 return pair 86 for pattern in list: 87 x, y, z = pattern 88 if (x, y) in [(a, b), (None, b)]: 89 if z in [None, c]: 90 return pattern 91 return pair
92 93
94 -def tagproperties(strings, ignore):
95 """Returns all the properties in the XML/HTML tag string as 96 (tagname, propertyname, propertyvalue), but ignore those combinations 97 specified in ignore.""" 98 properties = [] 99 for string in strings: 100 tag = tagname(string) 101 properties += [(tag, None, None)] 102 #Now we isolate the attribute pairs. 103 pairs = property_re.findall(string) 104 for property, value, a, b in pairs: 105 #Strip the quotes: 106 value = value[1:-1] 107 108 canignore = False 109 if (tag, property, value) in ignore or \ 110 intuplelist((tag, property, value), ignore) != (tag, property, value): 111 canignore = True 112 break 113 if not canignore: 114 properties += [(tag, property, value)] 115 return properties
116 117
118 -class FilterFailure(Exception):
119 """This exception signals that a Filter didn't pass, and gives an 120 explanation or a comment""" 121
122 - def __init__(self, messages):
123 if not isinstance(messages, list): 124 messages = [messages] 125 assert isinstance(messages[0], unicode) # Assumption: all of same type 126 joined = u", ".join(messages) 127 Exception.__init__(self, joined) 128 # Python 2.3 doesn't have .args 129 if not hasattr(self, "args"): 130 self.args = joined
131 132
133 -class SeriousFilterFailure(FilterFailure):
134 """This exception signals that a Filter didn't pass, and the bad translation 135 might break an application (so the string will be marked fuzzy)""" 136 pass
137 138 #(tag, attribute, value) specifies a certain attribute which can be changed/ 139 #ignored if it exists inside tag. In the case where there is a third element 140 #in the tuple, it indicates a property value that can be ignored if present 141 #(like defaults, for example) 142 #If a certain item is None, it indicates that it is relevant for all values of 143 #the property/tag that is specified as None. A non-None value of "value" 144 #indicates that the value of the attribute must be taken into account. 145 common_ignoretags = [(None, "xml-lang", None)] 146 common_canchangetags = [("img", "alt", None), 147 (None, "title", None), 148 (None, "dir", None), 149 (None, "lang", None), 150 ] 151 # Actually the title tag is allowed on many tags in HTML (but probably not all) 152 153
154 -class CheckerConfig(object):
155 """object representing the configuration of a checker""" 156
157 - def __init__(self, targetlanguage=None, accelmarkers=None, varmatches=None, 158 notranslatewords=None, musttranslatewords=None, 159 validchars=None, punctuation=None, endpunctuation=None, 160 ignoretags=None, canchangetags=None, criticaltests=None, 161 credit_sources=None):
162 # Init lists 163 self.accelmarkers = self._init_list(accelmarkers) 164 self.varmatches = self._init_list(varmatches) 165 self.criticaltests = self._init_list(criticaltests) 166 self.credit_sources = self._init_list(credit_sources) 167 # Lang data 168 self.targetlanguage = targetlanguage 169 self.updatetargetlanguage(targetlanguage) 170 self.sourcelang = factory.getlanguage('en') 171 # Inits with default values 172 self.punctuation = self._init_default(data.normalized_unicode(punctuation), 173 self.lang.punctuation) 174 self.endpunctuation = self._init_default(data.normalized_unicode(endpunctuation), 175 self.lang.sentenceend) 176 self.ignoretags = self._init_default(ignoretags, common_ignoretags) 177 self.canchangetags = self._init_default(canchangetags, common_canchangetags) 178 # Other data 179 # TODO: allow user configuration of untranslatable words 180 self.notranslatewords = dict.fromkeys([data.normalized_unicode(key) for key in self._init_list(notranslatewords)]) 181 self.musttranslatewords = dict.fromkeys([data.normalized_unicode(key) for key in self._init_list(musttranslatewords)]) 182 validchars = data.normalized_unicode(validchars) 183 self.validcharsmap = {} 184 self.updatevalidchars(validchars)
185
186 - def _init_list(self, list):
187 """initialise configuration paramaters that are lists 188 189 @type list: List 190 @param list: None (we'll initialise a blank list) or a list paramater 191 @rtype: List 192 """ 193 if list is None: 194 list = [] 195 return list
196
197 - def _init_default(self, param, default):
198 """initialise parameters that can have default options 199 200 @param param: the user supplied paramater value 201 @param default: default values when param is not specified 202 @return: the paramater as specified by the user of the default settings 203 """ 204 if param is None: 205 return default 206 return param
207
208 - def update(self, otherconfig):
209 """combines the info in otherconfig into this config object""" 210 self.targetlanguage = otherconfig.targetlanguage or self.targetlanguage 211 self.updatetargetlanguage(self.targetlanguage) 212 self.accelmarkers.extend([c for c in otherconfig.accelmarkers if not c in self.accelmarkers]) 213 self.varmatches.extend(otherconfig.varmatches) 214 self.notranslatewords.update(otherconfig.notranslatewords) 215 self.musttranslatewords.update(otherconfig.musttranslatewords) 216 self.validcharsmap.update(otherconfig.validcharsmap) 217 self.punctuation += otherconfig.punctuation 218 self.endpunctuation += otherconfig.endpunctuation 219 #TODO: consider also updating in the following cases: 220 self.ignoretags = otherconfig.ignoretags 221 self.canchangetags = otherconfig.canchangetags 222 self.criticaltests.extend(otherconfig.criticaltests) 223 self.credit_sources = otherconfig.credit_sources
224
225 - def updatevalidchars(self, validchars):
226 """updates the map that eliminates valid characters""" 227 if validchars is None: 228 return True 229 validcharsmap = dict([(ord(validchar), None) for validchar in data.normalized_unicode(validchars)]) 230 self.validcharsmap.update(validcharsmap)
231
232 - def updatetargetlanguage(self, langcode):
233 """Updates the target language in the config to the given target 234 language""" 235 self.lang = factory.getlanguage(langcode)
236 237
238 -def cache_results(f):
239 240 def cached_f(self, param1): 241 key = (f.__name__, param1) 242 res_cache = self.results_cache 243 if key in res_cache: 244 return res_cache[key] 245 else: 246 value = f(self, param1) 247 res_cache[key] = value 248 return value
249 return cached_f 250 251
252 -class UnitChecker(object):
253 """Parent Checker class which does the checking based on functions available 254 in derived classes.""" 255 preconditions = {} 256
257 - def __init__(self, checkerconfig=None, excludefilters=None, 258 limitfilters=None, errorhandler=None):
259 self.errorhandler = errorhandler 260 if checkerconfig is None: 261 self.setconfig(CheckerConfig()) 262 else: 263 self.setconfig(checkerconfig) 264 # exclude functions defined in UnitChecker from being treated as tests. 265 self.helperfunctions = {} 266 for functionname in dir(UnitChecker): 267 function = getattr(self, functionname) 268 if callable(function): 269 self.helperfunctions[functionname] = function 270 self.defaultfilters = self.getfilters(excludefilters, limitfilters) 271 self.results_cache = {}
272
273 - def getfilters(self, excludefilters=None, limitfilters=None):
274 """returns dictionary of available filters, including/excluding those in 275 the given lists""" 276 filters = {} 277 if limitfilters is None: 278 # use everything available unless instructed 279 limitfilters = dir(self) 280 if excludefilters is None: 281 excludefilters = {} 282 for functionname in limitfilters: 283 if functionname in excludefilters: 284 continue 285 if functionname in self.helperfunctions: 286 continue 287 if functionname == "errorhandler": 288 continue 289 filterfunction = getattr(self, functionname, None) 290 if not callable(filterfunction): 291 continue 292 filters[functionname] = filterfunction 293 return filters
294
295 - def setconfig(self, config):
296 """sets the accelerator list""" 297 self.config = config 298 self.accfilters = [prefilters.filteraccelerators(accelmarker) for accelmarker in self.config.accelmarkers] 299 self.varfilters = [prefilters.filtervariables(startmatch, endmatch, prefilters.varname) 300 for startmatch, endmatch in self.config.varmatches] 301 self.removevarfilter = [prefilters.filtervariables(startmatch, endmatch, 302 prefilters.varnone) 303 for startmatch, endmatch in self.config.varmatches]
304
305 - def setsuggestionstore(self, store):
306 """Sets the filename that a checker should use for evaluating 307 suggestions.""" 308 self.suggestion_store = store 309 if self.suggestion_store: 310 self.suggestion_store.require_index()
311
312 - def filtervariables(self, str1):
313 """filter out variables from str1""" 314 return helpers.multifilter(str1, self.varfilters)
315 filtervariables = cache_results(filtervariables) 316
317 - def removevariables(self, str1):
318 """remove variables from str1""" 319 return helpers.multifilter(str1, self.removevarfilter)
320 removevariables = cache_results(removevariables) 321
322 - def filteraccelerators(self, str1):
323 """filter out accelerators from str1""" 324 return helpers.multifilter(str1, self.accfilters, None)
325 filteraccelerators = cache_results(filteraccelerators) 326
327 - def filteraccelerators_by_list(self, str1, acceptlist=None):
328 """filter out accelerators from str1""" 329 return helpers.multifilter(str1, self.accfilters, acceptlist)
330
331 - def filterwordswithpunctuation(self, str1):
332 """replaces words with punctuation with their unpunctuated 333 equivalents""" 334 return prefilters.filterwordswithpunctuation(str1)
335 filterwordswithpunctuation = cache_results(filterwordswithpunctuation) 336
337 - def filterxml(self, str1):
338 """filter out XML from the string so only text remains""" 339 return tag_re.sub("", str1)
340 filterxml = cache_results(filterxml) 341
342 - def run_test(self, test, unit):
343 """Runs the given test on the given unit. 344 345 Note that this can raise a FilterFailure as part of normal operation""" 346 return test(unit)
347
348 - def run_filters(self, unit):
349 """run all the tests in this suite, return failures as testname, 350 message_or_exception""" 351 self.results_cache = {} 352 failures = {} 353 ignores = self.config.lang.ignoretests[:] 354 functionnames = self.defaultfilters.keys() 355 priorityfunctionnames = self.preconditions.keys() 356 otherfunctionnames = filter(lambda functionname: functionname not in self.preconditions, functionnames) 357 for functionname in priorityfunctionnames + otherfunctionnames: 358 if functionname in ignores: 359 continue 360 filterfunction = getattr(self, functionname, None) 361 # this filterfunction may only be defined on another checker if 362 # using TeeChecker 363 if filterfunction is None: 364 continue 365 filtermessage = filterfunction.__doc__ 366 try: 367 filterresult = self.run_test(filterfunction, unit) 368 except FilterFailure, e: 369 filterresult = False 370 filtermessage = e.args[0] 371 except Exception, e: 372 if self.errorhandler is None: 373 raise ValueError("error in filter %s: %r, %r, %s" % \ 374 (functionname, unit.source, unit.target, e)) 375 else: 376 filterresult = self.errorhandler(functionname, unit.source, 377 unit.target, e) 378 if not filterresult: 379 # we test some preconditions that aren't actually a cause for 380 # failure 381 if functionname in self.defaultfilters: 382 failures[functionname] = filtermessage 383 if functionname in self.preconditions: 384 for ignoredfunctionname in self.preconditions[functionname]: 385 ignores.append(ignoredfunctionname) 386 self.results_cache = {} 387 return failures
388 389
390 -class TranslationChecker(UnitChecker):
391 """A checker that passes source and target strings to the checks, not the 392 whole unit. 393 394 This provides some speedup and simplifies testing.""" 395
396 - def __init__(self, checkerconfig=None, excludefilters=None, 397 limitfilters=None, errorhandler=None):
398 super(TranslationChecker, self).__init__(checkerconfig, excludefilters, 399 limitfilters, errorhandler)
400
401 - def run_test(self, test, unit):
402 """Runs the given test on the given unit. 403 404 Note that this can raise a FilterFailure as part of normal operation.""" 405 if self.hasplural: 406 filtermessages = [] 407 filterresult = True 408 for pluralform in unit.target.strings: 409 try: 410 if not test(self.str1, unicode(pluralform)): 411 filterresult = False 412 except FilterFailure, e: 413 filterresult = False 414 filtermessages.append(unicode(e.args)) 415 if not filterresult and filtermessages: 416 raise FilterFailure(filtermessages) 417 else: 418 return filterresult 419 else: 420 return test(self.str1, self.str2)
421
422 - def run_filters(self, unit):
423 """Do some optimisation by caching some data of the unit for the benefit 424 of run_test().""" 425 self.str1 = data.normalized_unicode(unit.source) or u"" 426 self.str2 = data.normalized_unicode(unit.target) or u"" 427 self.hasplural = unit.hasplural() 428 self.locations = unit.getlocations() 429 return super(TranslationChecker, self).run_filters(unit)
430 431
432 -class TeeChecker:
433 """A Checker that controls multiple checkers.""" 434
435 - def __init__(self, checkerconfig=None, excludefilters=None, 436 limitfilters=None, checkerclasses=None, errorhandler=None, 437 languagecode=None):
438 """construct a TeeChecker from the given checkers""" 439 self.limitfilters = limitfilters 440 if checkerclasses is None: 441 checkerclasses = [StandardChecker] 442 self.checkers = [checkerclass(checkerconfig=checkerconfig, excludefilters=excludefilters, limitfilters=limitfilters, errorhandler=errorhandler) for checkerclass in checkerclasses] 443 if languagecode: 444 for checker in self.checkers: 445 checker.config.updatetargetlanguage(languagecode) 446 # Let's hook up the language specific checker 447 lang_checker = self.checkers[0].config.lang.checker 448 if lang_checker: 449 self.checkers.append(lang_checker) 450 451 self.combinedfilters = self.getfilters(excludefilters, limitfilters) 452 self.config = checkerconfig or self.checkers[0].config
453
454 - def getfilters(self, excludefilters=None, limitfilters=None):
455 """returns dictionary of available filters, including/excluding those in 456 the given lists""" 457 if excludefilters is None: 458 excludefilters = {} 459 filterslist = [checker.getfilters(excludefilters, limitfilters) for checker in self.checkers] 460 self.combinedfilters = {} 461 for filters in filterslist: 462 self.combinedfilters.update(filters) 463 # TODO: move this somewhere more sensible (a checkfilters method?) 464 if limitfilters is not None: 465 for filtername in limitfilters: 466 if not filtername in self.combinedfilters: 467 import sys 468 print >> sys.stderr, "warning: could not find filter %s" % filtername 469 return self.combinedfilters
470
471 - def run_filters(self, unit):
472 """run all the tests in the checker's suites""" 473 failures = {} 474 for checker in self.checkers: 475 failures.update(checker.run_filters(unit)) 476 return failures
477
478 - def setsuggestionstore(self, store):
479 """Sets the filename that a checker should use for evaluating 480 suggestions.""" 481 for checker in self.checkers: 482 checker.setsuggestionstore(store)
483 484
485 -class StandardChecker(TranslationChecker):
486 """The basic test suite for source -> target translations.""" 487
488 - def untranslated(self, str1, str2):
489 """checks whether a string has been translated at all""" 490 str2 = prefilters.removekdecomments(str2) 491 return not (len(str1.strip()) > 0 and len(str2) == 0)
492
493 - def unchanged(self, str1, str2):
494 """checks whether a translation is basically identical to the original 495 string""" 496 str1 = self.filteraccelerators(self.removevariables(str1)).strip() 497 str2 = self.filteraccelerators(self.removevariables(str2)).strip() 498 if len(str1) < 2: 499 return True 500 # If the whole string is upperase, or nothing in the string can go 501 # towards uppercase, let's assume there is nothing translatable 502 # TODO: reconsider 503 if (str1.isupper() or str1.upper() == str1) and str1 == str2: 504 return True 505 if self.config.notranslatewords: 506 words1 = str1.split() 507 if len(words1) == 1 and [word for word in words1 if word in self.config.notranslatewords]: 508 #currently equivalent to: 509 # if len(words1) == 1 and words1[0] in self.config.notranslatewords: 510 #why do we only test for one notranslate word? 511 return True 512 # we could also check for things like str1.isnumeric(), but the test 513 # above (str1.upper() == str1) makes this unnecessary 514 if str1.lower() == str2.lower(): 515 raise FilterFailure(u"please translate") 516 return True
517
518 - def blank(self, str1, str2):
519 """checks whether a translation only contains spaces""" 520 len1 = len(str1.strip()) 521 len2 = len(str2.strip()) 522 return not (len1 > 0 and len(str2) != 0 and len2 == 0)
523
524 - def short(self, str1, str2):
525 """checks whether a translation is much shorter than the original 526 string""" 527 len1 = len(str1.strip()) 528 len2 = len(str2.strip()) 529 return not ((len1 > 0) and (0 < len2 < (len1 * 0.1)) or ((len1 > 1) and (len2 == 1)))
530
531 - def long(self, str1, str2):
532 """checks whether a translation is much longer than the original 533 string""" 534 len1 = len(str1.strip()) 535 len2 = len(str2.strip()) 536 return not ((len1 > 0) and (0 < len1 < (len2 * 0.1)) or ((len1 == 1) and (len2 > 1)))
537
538 - def escapes(self, str1, str2):
539 """checks whether escaping is consistent between the two strings""" 540 if not helpers.countsmatch(str1, str2, (u"\\", u"\\\\")): 541 escapes1 = u", ".join([u"'%s'" % word for word in str1.split() if u"\\" in word]) 542 escapes2 = u", ".join([u"'%s'" % word for word in str2.split() if u"\\" in word]) 543 raise SeriousFilterFailure(u"escapes in original (%s) don't match escapes in translation (%s)" % (escapes1, escapes2)) 544 else: 545 return True
546
547 - def newlines(self, str1, str2):
548 """checks whether newlines are consistent between the two strings""" 549 if not helpers.countsmatch(str1, str2, (u"\n", u"\r")): 550 raise FilterFailure(u"line endings in original don't match line endings in translation") 551 else: 552 return True
553
554 - def tabs(self, str1, str2):
555 """checks whether tabs are consistent between the two strings""" 556 if not helpers.countmatch(str1, str2, "\t"): 557 raise SeriousFilterFailure(u"tabs in original don't match tabs in translation") 558 else: 559 return True
560
561 - def singlequoting(self, str1, str2):
562 """checks whether singlequoting is consistent between the two strings""" 563 str1 = self.filterwordswithpunctuation(self.filteraccelerators(self.filtervariables(str1))) 564 str1 = self.config.lang.punctranslate(str1) 565 str2 = self.filterwordswithpunctuation(self.filteraccelerators(self.filtervariables(str2))) 566 return helpers.countsmatch(str1, str2, (u"'", u"''", u"\\'"))
567
568 - def doublequoting(self, str1, str2):
569 """checks whether doublequoting is consistent between the two strings""" 570 str1 = self.filteraccelerators(self.filtervariables(str1)) 571 str1 = self.filterxml(str1) 572 str1 = self.config.lang.punctranslate(str1) 573 str2 = self.filteraccelerators(self.filtervariables(str2)) 574 str2 = self.filterxml(str2) 575 return helpers.countsmatch(str1, str2, (u'"', u'""', u'\\"', u"«", 576 u"»", u"“", u"”"))
577
578 - def doublespacing(self, str1, str2):
579 """checks for bad double-spaces by comparing to original""" 580 str1 = self.filteraccelerators(str1) 581 str2 = self.filteraccelerators(str2) 582 return helpers.countmatch(str1, str2, u" ")
583
584 - def puncspacing(self, str1, str2):
585 """checks for bad spacing after punctuation""" 586 # Convert all nbsp to space, and just check spaces. Useful intermediate 587 # step to stricter nbsp checking? 588 str1 = self.filteraccelerators(self.filtervariables(str1)) 589 str1 = self.config.lang.punctranslate(str1) 590 str1 = str1.replace(u"\u00a0", u" ") 591 if str1.find(u" ") == -1: 592 return True 593 str2 = self.filteraccelerators(self.filtervariables(str2)) 594 str2 = str2.replace(u"\u00a0", u" ") 595 for puncchar in self.config.punctuation: 596 plaincount1 = str1.count(puncchar) 597 plaincount2 = str2.count(puncchar) 598 if not plaincount1 or plaincount1 != plaincount2: 599 continue 600 spacecount1 = str1.count(puncchar + u" ") 601 spacecount2 = str2.count(puncchar + u" ") 602 if spacecount1 != spacecount2: 603 # handle extra spaces that are because of transposed punctuation 604 if str1.endswith(puncchar) != str2.endswith(puncchar) and abs(spacecount1 - spacecount2) == 1: 605 continue 606 return False 607 return True
608
609 - def printf(self, str1, str2):
610 """checks whether printf format strings match""" 611 count1 = count2 = plural = None 612 # self.hasplural only set by run_filters, not always available 613 if 'hasplural' in self.__dict__: 614 plural = self.hasplural 615 for var_num2, match2 in enumerate(printf_pat.finditer(str2)): 616 count2 = var_num2 + 1 617 str2key = match2.group('key') 618 if match2.group('ord'): 619 for var_num1, match1 in enumerate(printf_pat.finditer(str1)): 620 count1 = var_num1 + 1 621 if int(match2.group('ord')) == var_num1 + 1: 622 if match2.group('fullvar') != match1.group('fullvar'): 623 return 0 624 elif str2key: 625 str1key = None 626 for var_num1, match1 in enumerate(printf_pat.finditer(str1)): 627 count1 = var_num1 + 1 628 if match1.group('key') and str2key == match1.group('key'): 629 str1key = match1.group('key') 630 # '%.0s' "placeholder" in plural will match anything 631 if plural and match2.group('fullvar') == '.0s': 632 continue 633 if match1.group('fullvar') != match2.group('fullvar'): 634 return 0 635 if str1key == None: 636 return 0 637 else: 638 for var_num1, match1 in enumerate(printf_pat.finditer(str1)): 639 count1 = var_num1 + 1 640 # '%.0s' "placeholder" in plural will match anything 641 if plural and match2.group('fullvar') == '.0s': 642 continue 643 if (var_num1 == var_num2) and (match1.group('fullvar') != match2.group('fullvar')): 644 return 0 645 646 if count2 is None: 647 if list(printf_pat.finditer(str1)): 648 return 0 649 650 if (count1 or count2) and (count1 != count2): 651 return 0 652 return 1
653
654 - def accelerators(self, str1, str2):
655 """checks whether accelerators are consistent between the two strings""" 656 str1 = self.filtervariables(str1) 657 str2 = self.filtervariables(str2) 658 messages = [] 659 for accelmarker in self.config.accelmarkers: 660 counter1 = decoration.countaccelerators(accelmarker, self.config.sourcelang.validaccel) 661 counter2 = decoration.countaccelerators(accelmarker, self.config.lang.validaccel) 662 count1, countbad1 = counter1(str1) 663 count2, countbad2 = counter2(str2) 664 getaccel = decoration.getaccelerators(accelmarker, self.config.lang.validaccel) 665 accel2, bad2 = getaccel(str2) 666 if count1 == count2: 667 continue 668 if count1 == 1 and count2 == 0: 669 if countbad2 == 1: 670 messages.append(u"accelerator %s appears before an invalid accelerator character '%s' (eg. space)" % (accelmarker, bad2[0])) 671 else: 672 messages.append(u"accelerator %s is missing from translation" % accelmarker) 673 elif count1 == 0: 674 messages.append(u"accelerator %s does not occur in original and should not be in translation" % accelmarker) 675 elif count1 == 1 and count2 > count1: 676 messages.append(u"accelerator %s is repeated in translation" % accelmarker) 677 else: 678 messages.append(u"accelerator %s occurs %d time(s) in original and %d time(s) in translation" % (accelmarker, count1, count2)) 679 if messages: 680 if "accelerators" in self.config.criticaltests: 681 raise SeriousFilterFailure(messages) 682 else: 683 raise FilterFailure(messages) 684 return True
685 686 # def acceleratedvariables(self, str1, str2): 687 # """checks that no variables are accelerated""" 688 # messages = [] 689 # for accelerator in self.config.accelmarkers: 690 # for variablestart, variableend in self.config.varmatches: 691 # error = accelerator + variablestart 692 # if str1.find(error) >= 0: 693 # messages.append(u"original has an accelerated variable") 694 # if str2.find(error) >= 0: 695 # messages.append(u"translation has an accelerated variable") 696 # if messages: 697 # raise FilterFailure(messages) 698 # return True 699
700 - def variables(self, str1, str2):
701 """checks whether variables of various forms are consistent between the 702 two strings""" 703 messages = [] 704 mismatch1, mismatch2 = [], [] 705 varnames1, varnames2 = [], [] 706 for startmarker, endmarker in self.config.varmatches: 707 varchecker = decoration.getvariables(startmarker, endmarker) 708 if startmarker and endmarker: 709 if isinstance(endmarker, int): 710 redecorate = lambda var: startmarker + var 711 else: 712 redecorate = lambda var: startmarker + var + endmarker 713 elif startmarker: 714 redecorate = lambda var: startmarker + var 715 else: 716 redecorate = lambda var: var 717 vars1 = varchecker(str1) 718 vars2 = varchecker(str2) 719 if vars1 != vars2: 720 # we use counts to compare so we can handle multiple variables 721 vars1, vars2 = [var for var in vars1 if vars1.count(var) > vars2.count(var)], [var for var in vars2 if vars1.count(var) < vars2.count(var)] 722 # filter variable names we've already seen, so they aren't 723 # matched by more than one filter... 724 vars1, vars2 = [var for var in vars1 if var not in varnames1], [var for var in vars2 if var not in varnames2] 725 varnames1.extend(vars1) 726 varnames2.extend(vars2) 727 vars1 = map(redecorate, vars1) 728 vars2 = map(redecorate, vars2) 729 mismatch1.extend(vars1) 730 mismatch2.extend(vars2) 731 if mismatch1: 732 messages.append(u"do not translate: %s" % u", ".join(mismatch1)) 733 elif mismatch2: 734 messages.append(u"translation contains variables not in original: %s" % u", ".join(mismatch2)) 735 if messages and mismatch1: 736 raise SeriousFilterFailure(messages) 737 elif messages: 738 raise FilterFailure(messages) 739 return True
740
741 - def functions(self, str1, str2):
742 """checks that function names are not translated""" 743 return helpers.funcmatch(str1, str2, decoration.getfunctions, self.config.punctuation)
744
745 - def emails(self, str1, str2):
746 """checks that emails are not translated""" 747 return helpers.funcmatch(str1, str2, decoration.getemails)
748
749 - def urls(self, str1, str2):
750 """checks that URLs are not translated""" 751 return helpers.funcmatch(str1, str2, decoration.geturls)
752
753 - def numbers(self, str1, str2):
754 """checks whether numbers of various forms are consistent between the 755 two strings""" 756 return helpers.countsmatch(str1, str2, decoration.getnumbers(str1))
757
758 - def startwhitespace(self, str1, str2):
759 """checks whether whitespace at the beginning of the strings matches""" 760 return helpers.funcmatch(str1, str2, decoration.spacestart)
761
762 - def endwhitespace(self, str1, str2):
763 """checks whether whitespace at the end of the strings matches""" 764 str1 = self.config.lang.punctranslate(str1) 765 return helpers.funcmatch(str1, str2, decoration.spaceend)
766
767 - def startpunc(self, str1, str2):
768 """checks whether punctuation at the beginning of the strings match""" 769 str1 = self.filterxml(self.filteraccelerators(self.filtervariables(self.filterwordswithpunctuation(str1)))) 770 str1 = self.config.lang.punctranslate(str1) 771 str2 = self.filterxml(self.filteraccelerators(self.filtervariables(self.filterwordswithpunctuation(str2)))) 772 return helpers.funcmatch(str1, str2, decoration.puncstart, self.config.punctuation)
773
774 - def endpunc(self, str1, str2):
775 """checks whether punctuation at the end of the strings match""" 776 str1 = self.filtervariables(str1) 777 str1 = self.config.lang.punctranslate(str1) 778 str2 = self.filtervariables(str2) 779 str1 = str1.rstrip() 780 str2 = str2.rstrip() 781 return helpers.funcmatch(str1, str2, decoration.puncend, self.config.endpunctuation + u":")
782
783 - def purepunc(self, str1, str2):
784 """checks that strings that are purely punctuation are not changed""" 785 # this test is a subset of startandend 786 if (decoration.ispurepunctuation(str1)): 787 return str1 == str2 788 else: 789 return not decoration.ispurepunctuation(str2)
790
791 - def brackets(self, str1, str2):
792 """checks that the number of brackets in both strings match""" 793 str1 = self.filtervariables(str1) 794 str2 = self.filtervariables(str2) 795 messages = [] 796 missing = [] 797 extra = [] 798 for bracket in (u"[", u"]", u"{", u"}", u"(", u")"): 799 count1 = str1.count(bracket) 800 count2 = str2.count(bracket) 801 if count2 < count1: 802 missing.append(u"'%s'" % bracket) 803 elif count2 > count1: 804 extra.append(u"'%s'" % bracket) 805 if missing: 806 messages.append(u"translation is missing %s" % u", ".join(missing)) 807 if extra: 808 messages.append(u"translation has extra %s" % u", ".join(extra)) 809 if messages: 810 raise FilterFailure(messages) 811 return True
812
813 - def sentencecount(self, str1, str2):
814 """checks that the number of sentences in both strings match""" 815 str1 = self.filteraccelerators(str1) 816 str2 = self.filteraccelerators(str2) 817 sentences1 = len(self.config.sourcelang.sentences(str1)) 818 sentences2 = len(self.config.lang.sentences(str2)) 819 if not sentences1 == sentences2: 820 raise FilterFailure(u"The number of sentences differ: %d versus %d" % (sentences1, sentences2)) 821 return True
822
823 - def options(self, str1, str2):
824 """checks that options are not translated""" 825 str1 = self.filtervariables(str1) 826 for word1 in str1.split(): 827 if word1 != u"--" and word1.startswith(u"--") and word1[-1].isalnum(): 828 parts = word1.split(u"=") 829 if not parts[0] in str2: 830 raise FilterFailure(u"The option %s does not occur or is translated in the translation." % parts[0]) 831 if len(parts) > 1 and parts[1] in str2: 832 raise FilterFailure(u"The parameter %(param)s in option %(option)s is not translated." % {"param": parts[1], "option": parts[0]}) 833 return True
834
835 - def startcaps(self, str1, str2):
836 """checks that the message starts with the correct capitalisation""" 837 str1 = self.filteraccelerators(str1) 838 str2 = self.filteraccelerators(str2) 839 if len(str1) > 1 and len(str2) > 1: 840 return self.config.sourcelang.capsstart(str1) == self.config.lang.capsstart(str2) 841 if len(str1) == 0 and len(str2) == 0: 842 return True 843 if len(str1) == 0 or len(str2) == 0: 844 return False 845 return True
846
847 - def simplecaps(self, str1, str2):
848 """checks the capitalisation of two strings isn't wildly different""" 849 str1 = self.removevariables(str1) 850 str2 = self.removevariables(str2) 851 # TODO: review this. The 'I' is specific to English, so it probably 852 # serves no purpose to get sourcelang.sentenceend 853 str1 = re.sub(u"[^%s]( I )" % self.config.sourcelang.sentenceend, u" i ", str1) 854 capitals1 = helpers.filtercount(str1, unicode.isupper) 855 capitals2 = helpers.filtercount(str2, unicode.isupper) 856 alpha1 = helpers.filtercount(str1, unicode.isalpha) 857 alpha2 = helpers.filtercount(str2, unicode.isalpha) 858 # Capture the all caps case 859 if capitals1 == alpha1: 860 return capitals2 == alpha2 861 # some heuristic tests to try and see that the style of capitals is 862 # vaguely the same 863 if capitals1 == 0 or capitals1 == 1: 864 return capitals2 == capitals1 865 elif capitals1 < len(str1) / 10: 866 return capitals2 <= len(str2) / 8 867 elif len(str1) < 10: 868 return abs(capitals1 - capitals2) < 3 869 elif capitals1 > len(str1) * 6 / 10: 870 return capitals2 > len(str2) * 6 / 10 871 else: 872 return abs(capitals1 - capitals2) < (len(str1) + len(str2)) / 6
873
874 - def acronyms(self, str1, str2):
875 """checks that acronyms that appear are unchanged""" 876 acronyms = [] 877 allowed = [] 878 for startmatch, endmatch in self.config.varmatches: 879 allowed += decoration.getvariables(startmatch, endmatch)(str1) 880 allowed += self.config.musttranslatewords.keys() 881 str1 = self.filteraccelerators(self.filtervariables(str1)) 882 iter = self.config.lang.word_iter(str1) 883 str2 = self.filteraccelerators(self.filtervariables(str2)) 884 #TODO: strip XML? - should provide better error messsages 885 # see mail/chrome/messanger/smime.properties.po 886 #TODO: consider limiting the word length for recognising acronyms to 887 #something like 5/6 characters 888 for word in iter: 889 if word.isupper() and len(word) > 1 and word not in allowed: 890 if str2.find(word) == -1: 891 acronyms.append(word) 892 if acronyms: 893 raise FilterFailure(u"acronyms should not be translated: " + u", ".join(acronyms)) 894 return True
895
896 - def doublewords(self, str1, str2):
897 """checks for repeated words in the translation""" 898 lastword = "" 899 without_newlines = "\n".join(str2.split("\n")) 900 words = self.filteraccelerators(self.removevariables(self.filterxml(without_newlines))).replace(u".", u"").lower().split() 901 for word in words: 902 if word == lastword and word not in self.config.lang.validdoublewords: 903 raise FilterFailure(u"The word '%s' is repeated" % word) 904 lastword = word 905 return True
906
907 - def notranslatewords(self, str1, str2):
908 """checks that words configured as untranslatable appear in the 909 translation too""" 910 if not self.config.notranslatewords: 911 return True 912 str1 = self.filtervariables(str1) 913 str2 = self.filtervariables(str2) 914 #The above is full of strange quotes and things in utf-8 encoding. 915 #single apostrophe perhaps problematic in words like "doesn't" 916 for seperator in self.config.punctuation: 917 str1 = str1.replace(seperator, u" ") 918 str2 = str2.replace(seperator, u" ") 919 words1 = self.filteraccelerators(str1).split() 920 words2 = self.filteraccelerators(str2).split() 921 stopwords = [word for word in words1 if word in self.config.notranslatewords and word not in words2] 922 if stopwords: 923 raise FilterFailure(u"do not translate: %s" % (u", ".join(stopwords))) 924 return True
925
926 - def musttranslatewords(self, str1, str2):
927 """checks that words configured as definitely translatable don't appear 928 in the translation""" 929 if not self.config.musttranslatewords: 930 return True 931 str1 = self.removevariables(str1) 932 str2 = self.removevariables(str2) 933 # The above is full of strange quotes and things in utf-8 encoding. 934 # single apostrophe perhaps problematic in words like "doesn't" 935 for seperator in self.config.punctuation: 936 str1 = str1.replace(seperator, u" ") 937 str2 = str2.replace(seperator, u" ") 938 words1 = self.filteraccelerators(str1).split() 939 words2 = self.filteraccelerators(str2).split() 940 stopwords = [word for word in words1 if word in self.config.musttranslatewords and word in words2] 941 if stopwords: 942 raise FilterFailure(u"please translate: %s" % (u", ".join(stopwords))) 943 return True
944
945 - def validchars(self, str1, str2):
946 """checks that only characters specified as valid appear in the 947 translation""" 948 if not self.config.validcharsmap: 949 return True 950 invalid1 = str1.translate(self.config.validcharsmap) 951 invalid2 = str2.translate(self.config.validcharsmap) 952 invalidchars = [u"'%s' (\\u%04x)" % (invalidchar, ord(invalidchar)) for invalidchar in invalid2 if invalidchar not in invalid1] 953 if invalidchars: 954 raise FilterFailure(u"invalid chars: %s" % (u", ".join(invalidchars))) 955 return True
956
957 - def filepaths(self, str1, str2):
958 """checks that file paths have not been translated""" 959 for word1 in self.filteraccelerators(str1).split(): 960 if word1.startswith(u"/"): 961 if not helpers.countsmatch(str1, str2, (word1,)): 962 return False 963 return True
964
965 - def xmltags(self, str1, str2):
966 """checks that XML/HTML tags have not been translated""" 967 tags1 = tag_re.findall(str1) 968 if len(tags1) > 0: 969 if (len(tags1[0]) == len(str1)) and not u"=" in tags1[0]: 970 return True 971 tags2 = tag_re.findall(str2) 972 properties1 = tagproperties(tags1, self.config.ignoretags) 973 properties2 = tagproperties(tags2, self.config.ignoretags) 974 filtered1 = [] 975 filtered2 = [] 976 for property1 in properties1: 977 filtered1 += [intuplelist(property1, self.config.canchangetags)] 978 for property2 in properties2: 979 filtered2 += [intuplelist(property2, self.config.canchangetags)] 980 981 # TODO: consider the consequences of different ordering of 982 # attributes/tags 983 if filtered1 != filtered2: 984 return False 985 else: 986 # No tags in str1, let's just check that none were added in str2. 987 # This might be useful for fuzzy strings wrongly unfuzzied. 988 tags2 = tag_re.findall(str2) 989 if len(tags2) > 0: 990 return False 991 return True
992
993 - def kdecomments(self, str1, str2):
994 """checks to ensure that no KDE style comments appear in the 995 translation""" 996 return str2.find(u"\n_:") == -1 and not str2.startswith(u"_:")
997
998 - def compendiumconflicts(self, str1, str2):
999 """checks for Gettext compendium conflicts (#-#-#-#-#)""" 1000 return str2.find(u"#-#-#-#-#") == -1
1001
1002 - def simpleplurals(self, str1, str2):
1003 """checks for English style plural(s) for you to review""" 1004 1005 def numberofpatterns(string, patterns): 1006 number = 0 1007 for pattern in patterns: 1008 number += len(re.findall(pattern, string)) 1009 return number
1010 1011 sourcepatterns = ["\(s\)"] 1012 targetpatterns = ["\(s\)"] 1013 sourcecount = numberofpatterns(str1, sourcepatterns) 1014 targetcount = numberofpatterns(str2, targetpatterns) 1015 if self.config.lang.nplurals == 1: 1016 return not targetcount 1017 return sourcecount == targetcount
1018
1019 - def spellcheck(self, str1, str2):
1020 """checks words that don't pass a spell check""" 1021 if not self.config.targetlanguage: 1022 return True 1023 if not spelling.available: 1024 return True 1025 # TODO: filterxml? 1026 str1 = self.filteraccelerators_by_list(self.filtervariables(str1), self.config.sourcelang.validaccel) 1027 str2 = self.filteraccelerators_by_list(self.filtervariables(str2), self.config.lang.validaccel) 1028 ignore1 = [] 1029 messages = [] 1030 for word, index, suggestions in spelling.check(str1, lang="en"): 1031 ignore1.append(word) 1032 for word, index, suggestions in spelling.check(str2, lang=self.config.targetlanguage): 1033 if word in self.config.notranslatewords: 1034 continue 1035 if word in ignore1: 1036 continue 1037 # hack to ignore hyphenisation rules 1038 if word in suggestions: 1039 continue 1040 messages.append(u"check spelling of %s (could be %s)" % (word, u" / ".join(suggestions[:5]))) 1041 if messages: 1042 raise FilterFailure(messages) 1043 return True
1044
1045 - def credits(self, str1, str2):
1046 """checks for messages containing translation credits instead of normal 1047 translations.""" 1048 return not str1 in self.config.credit_sources
1049 1050 # If the precondition filter is run and fails then the other tests listed are ignored 1051 preconditions = {"untranslated": ("simplecaps", "variables", "startcaps", 1052 "accelerators", "brackets", "endpunc", 1053 "acronyms", "xmltags", "startpunc", 1054 "endwhitespace", "startwhitespace", 1055 "escapes", "doublequoting", "singlequoting", 1056 "filepaths", "purepunc", "doublespacing", 1057 "sentencecount", "numbers", "isfuzzy", 1058 "isreview", "notranslatewords", "musttranslatewords", 1059 "emails", "simpleplurals", "urls", "printf", 1060 "tabs", "newlines", "functions", "options", 1061 "blank", "nplurals", "gconf"), 1062 "blank": ("simplecaps", "variables", "startcaps", 1063 "accelerators", "brackets", "endpunc", 1064 "acronyms", "xmltags", "startpunc", 1065 "endwhitespace", "startwhitespace", 1066 "escapes", "doublequoting", "singlequoting", 1067 "filepaths", "purepunc", "doublespacing", 1068 "sentencecount", "numbers", "isfuzzy", 1069 "isreview", "notranslatewords", "musttranslatewords", 1070 "emails", "simpleplurals", "urls", "printf", 1071 "tabs", "newlines", "functions", "options", 1072 "gconf"), 1073 "credits": ("simplecaps", "variables", "startcaps", 1074 "accelerators", "brackets", "endpunc", 1075 "acronyms", "xmltags", "startpunc", 1076 "escapes", "doublequoting", "singlequoting", 1077 "filepaths", "doublespacing", 1078 "sentencecount", "numbers", 1079 "emails", "simpleplurals", "urls", "printf", 1080 "tabs", "newlines", "functions", "options"), 1081 "purepunc": ("startcaps", "options"), 1082 # This is causing some problems since Python 2.6, as 1083 # startcaps is now seen as an important one to always execute 1084 # and could now be done before it is blocked by a failing 1085 # "untranslated" or "blank" test. This is probably happening 1086 # due to slightly different implementation of the internal 1087 # dict handling since Python 2.6. We should never have relied 1088 # on this ordering anyway. 1089 #"startcaps": ("simplecaps",), 1090 "endwhitespace": ("endpunc",), 1091 "startwhitespace": ("startpunc",), 1092 "unchanged": ("doublewords",), 1093 "compendiumconflicts": ("accelerators", "brackets", "escapes", 1094 "numbers", "startpunc", "long", "variables", 1095 "startcaps", "sentencecount", "simplecaps", 1096 "doublespacing", "endpunc", "xmltags", 1097 "startwhitespace", "endwhitespace", 1098 "singlequoting", "doublequoting", 1099 "filepaths", "purepunc", "doublewords", "printf")} 1100 1101 # code to actually run the tests (use unittest?) 1102 1103 openofficeconfig = CheckerConfig( 1104 accelmarkers=["~"], 1105 varmatches=[("&", ";"), ("%", "%"), ("%", None), ("%", 0), ("$(", ")"), 1106 ("$", "$"), ("${", "}"), ("#", "#"), ("#", 1), ("#", 0), 1107 ("($", ")"), ("$[", "]"), ("[", "]"), ("$", None)], 1108 ignoretags=[("alt", "xml-lang", None), ("ahelp", "visibility", "visible"), 1109 ("img", "width", None), ("img", "height", None)], 1110 canchangetags=[("link", "name", None)], 1111 ) 1112
1113 -class OpenOfficeChecker(StandardChecker):
1114
1115 - def __init__(self, **kwargs):
1116 checkerconfig = kwargs.get("checkerconfig", None) 1117 if checkerconfig is None: 1118 checkerconfig = CheckerConfig() 1119 kwargs["checkerconfig"] = checkerconfig 1120 checkerconfig.update(openofficeconfig) 1121 StandardChecker.__init__(self, **kwargs)
1122 1123 mozillaconfig = CheckerConfig( 1124 accelmarkers=["&"], 1125 varmatches=[("&", ";"), ("%", "%"), ("%", 1), ("$", "$"), ("$", None), 1126 ("#", 1), ("${", "}"), ("$(^", ")")], 1127 criticaltests=["accelerators"], 1128 ) 1129
1130 -class MozillaChecker(StandardChecker):
1131
1132 - def __init__(self, **kwargs):
1133 checkerconfig = kwargs.get("checkerconfig", None) 1134 if checkerconfig is None: 1135 checkerconfig = CheckerConfig() 1136 kwargs["checkerconfig"] = checkerconfig 1137 checkerconfig.update(mozillaconfig) 1138 StandardChecker.__init__(self, **kwargs)
1139
1140 - def credits(self, str1, str2):
1141 """checks for messages containing translation credits instead of normal 1142 translations.""" 1143 for location in self.locations: 1144 if location in ['MOZ_LANGPACK_CONTRIBUTORS', 'credit.translation']: 1145 return False 1146 return True
1147 1148 drupalconfig = CheckerConfig( 1149 varmatches=[("%", None), ("@", None), ("!", None)], 1150 ) 1151
1152 -class DrupalChecker(StandardChecker):
1153
1154 - def __init__(self, **kwargs):
1155 checkerconfig = kwargs.get("checkerconfig", None) 1156 if checkerconfig is None: 1157 checkerconfig = CheckerConfig() 1158 kwargs["checkerconfig"] = checkerconfig 1159 checkerconfig.update(drupalconfig) 1160 StandardChecker.__init__(self, **kwargs)
1161 1162 gnomeconfig = CheckerConfig( 1163 accelmarkers=["_"], 1164 varmatches=[("%", 1), ("$(", ")")], 1165 credit_sources=[u"translator-credits"], 1166 ) 1167
1168 -class GnomeChecker(StandardChecker):
1169
1170 - def __init__(self, **kwargs):
1171 checkerconfig = kwargs.get("checkerconfig", None) 1172 if checkerconfig is None: 1173 checkerconfig = CheckerConfig() 1174 kwargs["checkerconfig"] = checkerconfig 1175 checkerconfig.update(gnomeconfig) 1176 StandardChecker.__init__(self, **kwargs)
1177
1178 - def gconf(self, str1, str2):
1179 """Checks if we have any gconf config settings translated.""" 1180 for location in self.locations: 1181 if location.find('schemas.in') != -1: 1182 gconf_attributes = gconf_attribute_re.findall(str1) 1183 #stopwords = [word for word in words1 if word in self.config.notranslatewords and word not in words2] 1184 stopwords = [word for word in gconf_attributes if word[1:-1] not in str2] 1185 if stopwords: 1186 raise FilterFailure(u"do not translate gconf attribute: %s" % (u", ".join(stopwords))) 1187 return True
1188 1189 kdeconfig = CheckerConfig( 1190 accelmarkers=["&"], 1191 varmatches=[("%", 1)], 1192 credit_sources=[u"Your names", u"Your emails", u"ROLES_OF_TRANSLATORS"], 1193 ) 1194
1195 -class KdeChecker(StandardChecker):
1196
1197 - def __init__(self, **kwargs):
1198 # TODO allow setup of KDE plural and translator comments so that they do 1199 # not create false postives 1200 checkerconfig = kwargs.get("checkerconfig", None) 1201 if checkerconfig is None: 1202 checkerconfig = CheckerConfig() 1203 kwargs["checkerconfig"] = checkerconfig 1204 checkerconfig.update(kdeconfig) 1205 StandardChecker.__init__(self, **kwargs)
1206 1207 cclicenseconfig = CheckerConfig(varmatches=[("@", "@")]) 1208
1209 -class CCLicenseChecker(StandardChecker):
1210
1211 - def __init__(self, **kwargs):
1212 checkerconfig = kwargs.get("checkerconfig", None) 1213 if checkerconfig is None: 1214 checkerconfig = CheckerConfig() 1215 kwargs["checkerconfig"] = checkerconfig 1216 checkerconfig.update(cclicenseconfig) 1217 StandardChecker.__init__(self, **kwargs)
1218 1219 projectcheckers = { 1220 "openoffice": OpenOfficeChecker, 1221 "mozilla": MozillaChecker, 1222 "kde": KdeChecker, 1223 "wx": KdeChecker, 1224 "gnome": GnomeChecker, 1225 "creativecommons": CCLicenseChecker, 1226 "drupal": DrupalChecker, 1227 } 1228 1229
1230 -class StandardUnitChecker(UnitChecker):
1231 """The standard checks for common checks on translation units.""" 1232
1233 - def isfuzzy(self, unit):
1234 """Check if the unit has been marked fuzzy.""" 1235 return not unit.isfuzzy()
1236
1237 - def isreview(self, unit):
1238 """Check if the unit has been marked review.""" 1239 return not unit.isreview()
1240
1241 - def nplurals(self, unit):
1242 """Checks for the correct number of noun forms for plural 1243 translations.""" 1244 if unit.hasplural(): 1245 # if we don't have a valid nplurals value, don't run the test 1246 nplurals = self.config.lang.nplurals 1247 if nplurals > 0: 1248 return len(unit.target.strings) == nplurals 1249 return True
1250
1251 - def hassuggestion(self, unit):
1252 """Checks if there is at least one suggested translation for this 1253 unit.""" 1254 self.suggestion_store = getattr(self, 'suggestion_store', None) 1255 suggestions = [] 1256 if self.suggestion_store: 1257 suggestions = self.suggestion_store.findunits(unit.source) 1258 elif xliff and isinstance(unit, xliff.xliffunit): 1259 # TODO: we probably want to filter them somehow 1260 suggestions = unit.getalttrans() 1261 return not bool(suggestions)
1262 1263
1264 -def runtests(str1, str2, ignorelist=()):
1265 """verifies that the tests pass for a pair of strings""" 1266 from translate.storage import base 1267 str1 = data.normalized_unicode(str1) 1268 str2 = data.normalized_unicode(str2) 1269 unit = base.TranslationUnit(str1) 1270 unit.target = str2 1271 checker = StandardChecker(excludefilters=ignorelist) 1272 failures = checker.run_filters(unit) 1273 for test in failures: 1274 print "failure: %s: %s\n %r\n %r" % (test, failures[test], str1, str2) 1275 return failures
1276 1277
1278 -def batchruntests(pairs):
1279 """runs test on a batch of string pairs""" 1280 passed, numpairs = 0, len(pairs) 1281 for str1, str2 in pairs: 1282 if runtests(str1, str2): 1283 passed += 1 1284 print 1285 print "total: %d/%d pairs passed" % (passed, numpairs)
1286 1287 if __name__ == '__main__': 1288 testset = [(r"simple", r"somple"), 1289 (r"\this equals \that", r"does \this equal \that?"), 1290 (r"this \'equals\' that", r"this 'equals' that"), 1291 (r" start and end! they must match.", r"start and end! they must match."), 1292 (r"check for matching %variables marked like %this", r"%this %variable is marked"), 1293 (r"check for mismatching %variables marked like %this", r"%that %variable is marked"), 1294 (r"check for mismatching %variables% too", r"how many %variable% are marked"), 1295 (r"%% %%", r"%%"), 1296 (r"Row: %1, Column: %2", r"Mothalo: %1, Kholomo: %2"), 1297 (r"simple lowercase", r"it is all lowercase"), 1298 (r"simple lowercase", r"It Is All Lowercase"), 1299 (r"Simple First Letter Capitals", r"First Letters"), 1300 (r"SIMPLE CAPITALS", r"First Letters"), 1301 (r"SIMPLE CAPITALS", r"ALL CAPITALS"), 1302 (r"forgot to translate", r" "), 1303 ] 1304 batchruntests(testset) 1305