Wikipedia:Scripts/ImageFileMigrator/Wiki.py
Appearance
#!/usr/bin/env python
import urllib2
import MultipartPostHandler
import cStringIO
#From http://fabien.seisen.org/python/urllib2_multipart.html
from urllib import urlencode
import cookielib
import re
import os
class Wiki:
def __init__(self, domain, path = '/index.php'):
#def __init__(self, domain, path = '/index.php5'):
self.domain = domain
self.path = self.domain + path
self.token = ''
self.cookie_processor = urllib2.HTTPCookieProcessor()
self.opener = urllib2.build_opener(self.cookie_processor, MultipartPostHandler.MultipartPostHandler())
def login(self, user, password):
#call the login page with no data to get the token, if there is one
loginpage = self.opener.open(self.path + "?title=Special:UserLogin")
matches = re.findall('<input type="hidden" name="wpLoginToken" value="(\w*)" /></form>', loginpage.read())
if matches: self.token = matches[0]
data = {'wpName': user,
'wpPassword': password,
'wpLoginattempt': 'Log in'}
if self.token: data['wpLoginToken'] = self.token
url = self.path + "?title=Special:UserLogin&action=submitlogin&type=login"
response = self.opener.open(url, urlencode(data))
return response.read()
def get_image_list(self):
# Setting to 100,000 by default to try to get all images.
# Older versions of MediaWiki:
#image_request = self.opener.open(self.path + "/Special:Imagelist?limit=100000")
image_request = self.opener.open(self.path + "?limit=100000&ilsearch=&title=Special:ImageList")
html = image_request.read()
matches = re.findall("<td class=\"TablePager_col_img_name\"><a href=\".*\" title=\"(.*)\">.*</a> \(<a href=\"(.*)\">file</a>\)</td>", html)
images = [(match[0],match[1]) for match in matches]
return images
def get_page_export(self, pages):
text = "\n".join(pages)
data = {
'curonly': 'on',
'pages': text,
#'templates': '',
#'wpDownload': '',
'submit': 'Export'
}
# Older versions of MediaWiki:
#url = self.path + "/Special:Export"
url = self.path + "?title=Special:Export"
result = self.opener.open((url), urlencode(data))
return result.read()
def import_pages(self, xml):
xml_file = open("import.xml", "w")
xml_file.write(xml)
xml_file.close()
xml_file = open("import.xml", "r")
data = {
'action': 'submit',
'xmlimport': xml_file,
'source': 'upload',
'submit': 'Upload File'
}
url = self.path + "?title=Special:Import&action=submit"
result = self.opener.open((url), data)
xml_file.close()
os.remove("import.xml")
return result.read()
def upload_image(self, filename):
name = filename.split("/")[-1]
image_file = open(filename, "rb")
data = {
'wpUploadFile': image_file,
'wpDestFile': name,
'wpUpload': 'Upload File',
'wpIgnoreWarning': 'off'
}
url = self.path + "?title=Special:Upload&action=submit"
result = self.opener.open((url), data)
return result.read()
def logout(self):
self.cookie_processor.cookiejar.clear()