#!/usr/bin/python
# -*- coding: utf-8 -*-
"""
This bot goes over multiple pages of the home wiki, searches for selflinks, and
allows removing them.
This script understands various command-line arguments:
-start: used as -start:page_name, specifies that the robot should
go alphabetically through all pages on the home wiki,
starting at the named page.
-file: used as -file:file_name, read a list of pages to treat
from the named textfile. Page titles should be enclosed
in [[double-squared brackets]].
-ref: used as -ref:page_name, specifies that the robot should
work on all pages referring to the named page.
-links: used as -links:page_name, specifies that the robot should
work on all pages referred to from the named page.
-cat: used as -cat:category_name, specifies that the robot should
work on all pages in the named category.
All other parameters will be regarded as a page title; in this case, the bot
will only work on a single page.
"""
__version__='$Id: selflink.py,v 1.5 2006/03/12 20:14:01 wikipedian Exp $'
import wikipedia, pagegenerators, catlib
import re, sys
# Summary messages in different languages
# NOTE: Predefined replacement tasks might use their own dictionary, see 'fixes'
# below.
msg = {
'de':u'Bot: Entferne Selbstlinks',
'en':u'Robot: Removing selflinks',
}
class SelflinkBot:
def __init__(self, generator):
self.generator = generator
def run(self):
linktrail = wikipedia.getSite().linktrail()
# The regular expression which finds links. Results consist of four groups:
# group title is the target page title, that is, everything before | or ].
# group section is the page section. It'll include the # to make life easier for us.
# group label is the alternative link title, that's everything between | and ].
# group linktrail is the link trail, that's letters after ]] which are part of the word.
# note that the definition of 'letter' varies from language to language.
linkR = re.compile(r'\[\[(?P