# -*- coding: iso8859-1 -*- """ Script to copy a table from one Wikipedia to another one, translating it on-the-fly. Syntax: copy_table.py -type:abcd -from:xy Article_Name Command line options: -from:xy Copy the table from the Wikipedia article in language xy Article must have interwiki link to xy -debug Show debug info, and don't send the results to the server -type:abcd Translates the table, using translations given below. When the -type argument is not used, the bot will simply copy the table as-is. -file:XYZ Reads article names from a file. XYZ is the name of the file from which the list is taken. If XYZ is not given, the user is asked for a filename. Page titles should be saved one per line, without [[brackets]]. The -pos parameter won't work if -file is used. -image Copy all images within the found table to the target Wikipedia. Make sure the bot is logged in before trying to upload images. Article_Name: Name of the article where a table should be inserted """ # # (C) Daniel Herding, 2004 # # Distributed under the terms of the MIT license. # __version__='$Id: copy_table.py,v 1.1 2006/01/16 20:49:19 wikipedian Exp $' # import wikipedia, translator, lib_images import re, sys, string # Summary message msg={ "en":"robot: copying table from ", "de":"Bot: Kopiere Tabelle von ", "pt":"Bot: Copiando tabela de ", } # Prints text on the screen only if in -debug mode. # Argument text should be raw unicode. def print_debug(text): if debug: wikipedia.output(text) # this is a modified version of wikipedia.imagelinks(), it only looks in text, not in the whole page. def imagelinks(site, text): image_ns = site.image_namespace() # regular expression which matches e.g. "Image" as well as "image" (for en:) im = '[' + image_ns[0].upper() + image_ns[0].lower() + ']' + image_ns[1:] w1=r'('+im+':[^\]\|]*)' w2=r'([^\]]*)' Rlink = re.compile(r'\[\['+w1+r'(\|'+w2+r')?\]\]') result = [] for l in Rlink.findall(text): result.append(l[0]) return result # opens on a page, checks for an interwiki link, transfers and translates the first # table, copies all images in that table. def treat(to_pl, fromsite): try: to_text = to_pl.get() interwikis = to_pl.interwiki() except wikipedia.IsRedirectPage: print "Can't work on redirect page." return except wikipedia.NoPage: print "Page not found." return from_pl = None for interwiki in interwikis: if interwiki.site() == fromsite: from_pl = interwiki if from_pl is None: print "Interwiki link to %s not found." % repr(fromsite) return from_text = from_pl.get() wikipedia.setAction(wikipedia.translate(mysite.lang, msg) + from_pl.aslink()) # search start of table table = get_table(from_text) if not table: wikipedia.output(u"No table found in %s" % (from_pl.aslink())) return print_debug(u"Copying images") if copy_images: # extract image links from original table images=imagelinks(fromsite, table) for image in images: # Copy the image to the current wikipedia, copy the image description page as well. # Prompt the user so that he can translate the filename. new_filename = lib_images.transfer_image(wikipedia.Page(fromsite, image), debug) # if the upload succeeded if new_filename: old_image_tag = wikipedia.Page(fromsite, image).title() new_image_tag = wikipedia.Page(mysite, mysite.image_namespace() + ":" + new_filename).title() print_debug(u"Replacing " + old_image_tag + " with " + new_image_tag) # We want to replace "Image:My pic.jpg" as well as "image:my_pic.jpg", so we need a regular expression. old_image_tag = old_image_tag.replace(" ", "[ \_]") old_image_tag = "[" + old_image_tag[0].upper() + old_image_tag[0].lower() + "]" + old_image_tag[1:] #todo: regex for first letter of filename, i.e. first letter after the colon rOld_image_tag = re.compile(old_image_tag) table = re.sub(old_image_tag, new_image_tag, table) translated_table = translator.translate(table, type, fromsite.lang, debug, mysite.lang) if not translated_table: print "Could not translate table." return print_debug(u"\n" + translated_table) # add table to top of the article, seperated by a blank lines to_text = translated_table + "\n\n" + to_text if not debug: # save changes on Wikipedia to_pl.put(to_text, minorEdit='0') # Regular expression that will match both