Package translate :: Package convert :: Module xliff2oo
[hide private]
[frames] | no frames]

Source Code for Module translate.convert.xliff2oo

  1  #!/usr/bin/env python 
  2  # -*- coding: utf-8 -*- 
  3  # 
  4  # Copyright 2004-2006 Zuza Software Foundation 
  5  # 
  6  # This file is part of translate. 
  7  # 
  8  # translate is free software; you can redistribute it and/or modify 
  9  # it under the terms of the GNU General Public License as published by 
 10  # the Free Software Foundation; either version 2 of the License, or 
 11  # (at your option) any later version. 
 12  # 
 13  # translate is distributed in the hope that it will be useful, 
 14  # but WITHOUT ANY WARRANTY; without even the implied warranty of 
 15  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
 16  # GNU General Public License for more details. 
 17  # 
 18  # You should have received a copy of the GNU General Public License 
 19  # along with translate; if not, write to the Free Software 
 20  # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA 
 21  # 
 22   
 23  """convert XLIFF localization files to an OpenOffice.org (SDF) localization file""" 
 24   
 25  import os 
 26  import sys 
 27  import time 
 28   
 29  from translate.storage import oo 
 30  from translate.storage import factory 
 31  from translate.filters import pofilter 
 32  from translate.filters import checks 
 33  from translate.filters import autocorrect 
 34   
 35   
36 -class reoo:
37
38 - def __init__(self, templatefile, languages=None, timestamp=None, includefuzzy=False, long_keys=False, filteraction="exclude"):
39 """construct a reoo converter for the specified languages (timestamp=0 means leave unchanged)""" 40 # languages is a pair of language ids 41 self.long_keys = long_keys 42 self.readoo(templatefile) 43 self.languages = languages 44 self.filteraction = filteraction 45 if timestamp is None: 46 self.timestamp = time.strptime("2002-02-02 02:02:02", "%Y-%m-%d %H:%M:%S") 47 else: 48 self.timestamp = timestamp 49 if self.timestamp: 50 self.timestamp_str = time.strftime("%Y-%m-%d %H:%M:%S", self.timestamp) 51 else: 52 self.timestamp_str = None 53 self.includefuzzy = includefuzzy
54
55 - def makeindex(self):
56 """makes an index of the oo keys that are used in the source file""" 57 self.index = {} 58 for ookey, theoo in self.o.ookeys.iteritems(): 59 sourcekey = oo.makekey(ookey, self.long_keys) 60 self.index[sourcekey] = theoo
61
62 - def readoo(self, of):
63 """read in the oo from the file""" 64 oosrc = of.read() 65 self.o = oo.oofile() 66 self.o.parse(oosrc) 67 self.makeindex()
68
69 - def handleunit(self, unit):
70 # TODO: make this work for multiple columns in oo... 71 locations = unit.getlocations() 72 # technically our formats should just have one location for each entry... 73 # but we handle multiple ones just to be safe... 74 for location in locations: 75 subkeypos = location.rfind('.') 76 subkey = location[subkeypos+1:] 77 key = location[:subkeypos] 78 # this is just to handle our old system of using %s/%s:%s instead of %s/%s#%s 79 key = key.replace(':', '#') 80 # this is to handle using / instead of \ in the sourcefile... 81 key = key.replace('\\', '/') 82 key = oo.normalizefilename(key) 83 if key in self.index: 84 # now we need to replace the definition of entity with msgstr 85 theoo = self.index[key] # find the oo 86 self.applytranslation(key, subkey, theoo, unit) 87 else: 88 print >> sys.stderr, "couldn't find key %s from po in %d keys" % (key, len(self.index)) 89 try: 90 sourceunitlines = str(unit) 91 if isinstance(sourceunitlines, unicode): 92 sourceunitlines = sourceunitlines.encode("utf-8") 93 print >> sys.stderr, sourceunitlines 94 except: 95 print >> sys.stderr, "error outputting source unit %r" % (str(unit),)
96
97 - def applytranslation(self, key, subkey, theoo, unit):
98 """applies the translation from the source unit to the oo unit""" 99 if not self.includefuzzy and unit.isfuzzy(): 100 return 101 makecopy = False 102 if self.languages is None: 103 part1 = theoo.lines[0] 104 if len(theoo.lines) > 1: 105 part2 = theoo.lines[1] 106 else: 107 makecopy = True 108 else: 109 part1 = theoo.languages[self.languages[0]] 110 if self.languages[1] in theoo.languages: 111 part2 = theoo.languages[self.languages[1]] 112 else: 113 makecopy = True 114 if makecopy: 115 part2 = oo.ooline(part1.getparts()) 116 unquotedid = unit.source 117 unquotedstr = unit.target 118 # If there is no translation, we don't want to add a line 119 if len(unquotedstr.strip()) == 0: 120 return 121 if isinstance(unquotedstr, unicode): 122 unquotedstr = unquotedstr.encode("UTF-8") 123 # finally set the new definition in the oo, but not if its empty 124 if len(unquotedstr) > 0: 125 subkey = subkey.strip() 126 setattr(part2, subkey, unquotedstr) 127 # set the modified time 128 if self.timestamp_str: 129 part2.timestamp = self.timestamp_str 130 if self.languages: 131 part2.languageid = self.languages[1] 132 if makecopy: 133 theoo.addline(part2)
134
135 - def convertstore(self, sourcestore):
136 self.p = sourcestore 137 # translate the strings 138 for unit in self.p.units: 139 # there may be more than one element due to msguniq merge 140 if filter.validelement(unit, self.p.filename, self.filteraction): 141 self.handleunit(unit) 142 # return the modified oo file object 143 return self.o
144 145
146 -def getmtime(filename):
147 import stat 148 return time.localtime(os.stat(filename)[stat.ST_MTIME])
149 150
151 -class oocheckfilter(pofilter.pocheckfilter):
152
153 - def validelement(self, unit, filename, filteraction):
154 """Returns whether or not to use unit in conversion. (filename is just for error reporting)""" 155 if filteraction == "none": 156 return True 157 filterresult = self.filterunit(unit) 158 if filterresult: 159 if filterresult != autocorrect: 160 for filtername, filtermessage in filterresult.iteritems(): 161 location = unit.getlocations()[0] 162 if filtername in self.options.error: 163 print >> sys.stderr, "Error at %s::%s: %s" % (filename, location, filtermessage) 164 return not filteraction in ["exclude-all", "exclude-serious"] 165 if filtername in self.options.warning or self.options.alwayswarn: 166 print >> sys.stderr, "Warning at %s::%s: %s" % (filename, location, filtermessage) 167 return not filteraction in ["exclude-all"] 168 return True
169 170
171 -class oofilteroptions:
172 error = ['variables', 'xmltags', 'escapes'] 173 warning = ['blank'] 174 #To only issue warnings for tests listed in warning, change the following to False: 175 alwayswarn = True 176 limitfilters = error + warning 177 #To use all available tests, uncomment the following: 178 #limitfilters = [] 179 #To exclude certain tests, list them in here: 180 excludefilters = {} 181 includefuzzy = False 182 includereview = False 183 autocorrect = False
184 185 options = oofilteroptions() 186 filter = oocheckfilter(options, [checks.OpenOfficeChecker, checks.StandardUnitChecker], checks.openofficeconfig) 187 188
189 -def convertoo(inputfile, outputfile, templatefile, sourcelanguage=None, targetlanguage=None, timestamp=None, includefuzzy=False, multifilestyle="single", filteraction=None):
190 inputstore = factory.getobject(inputfile) 191 inputstore.filename = getattr(inputfile, 'name', '') 192 if not targetlanguage: 193 raise ValueError("You must specify the target language") 194 if not sourcelanguage: 195 if targetlanguage.isdigit(): 196 sourcelanguage = "01" 197 else: 198 sourcelanguage = "en-US" 199 languages = (sourcelanguage, targetlanguage) 200 if templatefile is None: 201 raise ValueError("must have template file for oo files") 202 else: 203 convertor = reoo(templatefile, languages=languages, timestamp=timestamp, includefuzzy=includefuzzy, long_keys=multifilestyle != "single", filteraction=filteraction) 204 outputstore = convertor.convertstore(inputstore) 205 # TODO: check if we need to manually delete missing items 206 outputfile.write(str(outputstore)) 207 return True
208 209
210 -def main(argv=None):
211 from translate.convert import convert 212 formats = {("po", "oo"): ("oo", convertoo), ("xlf", "oo"): ("oo", convertoo), ("xlf", "sdf"): ("sdf", convertoo)} 213 # always treat the input as an archive unless it is a directory 214 archiveformats = {(None, "output"): oo.oomultifile, (None, "template"): oo.oomultifile} 215 parser = convert.ArchiveConvertOptionParser(formats, usetemplates=True, description=__doc__, archiveformats=archiveformats) 216 parser.add_option("-l", "--language", dest="targetlanguage", default=None, 217 help="set target language code (e.g. af-ZA) [required]", metavar="LANG") 218 parser.add_option("", "--source-language", dest="sourcelanguage", default=None, 219 help="set source language code (default en-US)", metavar="LANG") 220 parser.add_option("-T", "--keeptimestamp", dest="timestamp", default=None, action="store_const", const=0, 221 help="don't change the timestamps of the strings") 222 parser.add_option("", "--nonrecursiveoutput", dest="allowrecursiveoutput", default=True, action="store_false", help="don't treat the output oo as a recursive store") 223 parser.add_option("", "--nonrecursivetemplate", dest="allowrecursivetemplate", default=True, action="store_false", help="don't treat the template oo as a recursive store") 224 parser.add_option("", "--filteraction", dest="filteraction", default="none", metavar="ACTION", 225 help="action on pofilter failure: none (default), warn, exclude-serious, exclude-all") 226 parser.add_fuzzy_option() 227 parser.add_multifile_option() 228 parser.passthrough.append("sourcelanguage") 229 parser.passthrough.append("targetlanguage") 230 parser.passthrough.append("timestamp") 231 parser.passthrough.append("filteraction") 232 parser.run(argv)
233 234 235 if __name__ == '__main__': 236 main() 237