Jump to content

Wikipedia:Scripts/ImageFileMigrator/Wiki.py

From Wikipedia, the free encyclopedia
#!/usr/bin/env python
import urllib2
import MultipartPostHandler
import cStringIO
#From http://fabien.seisen.org/python/urllib2_multipart.html
from urllib import urlencode
import cookielib
import re
import os
 
class Wiki:
    def __init__(self, domain, path = '/index.php'):
    #def __init__(self, domain, path = '/index.php5'):
        self.domain = domain
        self.path = self.domain + path
        self.token = ''
        self.cookie_processor = urllib2.HTTPCookieProcessor()
        self.opener = urllib2.build_opener(self.cookie_processor, MultipartPostHandler.MultipartPostHandler())
 
    def login(self, user, password):
        #call the login page with no data to get the token, if there is one
        loginpage = self.opener.open(self.path + "?title=Special:UserLogin")
        matches = re.findall('<input type="hidden" name="wpLoginToken" value="(\w*)" /></form>', loginpage.read())
        if matches: self.token = matches[0]
        data = {'wpName': user,
                'wpPassword': password,
                'wpLoginattempt': 'Log in'}
        if self.token: data['wpLoginToken'] = self.token
        url = self.path  + "?title=Special:UserLogin&action=submitlogin&type=login"
        response = self.opener.open(url, urlencode(data))
        return response.read()
 
    def get_image_list(self):
        # Setting to 100,000 by default to try to get all images.
        # Older versions of MediaWiki:
        #image_request = self.opener.open(self.path + "/Special:Imagelist?limit=100000")
        image_request = self.opener.open(self.path + "?limit=100000&ilsearch=&title=Special:ImageList")
        html = image_request.read()
        matches = re.findall("<td class=\"TablePager_col_img_name\"><a href=\".*\" title=\"(.*)\">.*</a> \(<a href=\"(.*)\">file</a>\)</td>", html)
        images = [(match[0],match[1]) for match in matches]
        return images
 
    def get_page_export(self, pages):
        text = "\n".join(pages)
        data = {
            'curonly': 'on',
            'pages': text,
            #'templates': '',
            #'wpDownload': '',
            'submit': 'Export'
        }
        # Older versions of MediaWiki:
        #url = self.path + "/Special:Export"
        url = self.path + "?title=Special:Export"
        result = self.opener.open((url), urlencode(data))
        return result.read()
    def import_pages(self, xml):
        xml_file = open("import.xml", "w")
        xml_file.write(xml)
        xml_file.close()
        xml_file = open("import.xml", "r")
        data = {
            'action': 'submit',
            'xmlimport': xml_file,
            'source': 'upload',
            'submit': 'Upload File'
        }
        url = self.path + "?title=Special:Import&action=submit"
        result = self.opener.open((url), data)
        xml_file.close()
        os.remove("import.xml")
        return result.read()
    def upload_image(self, filename):
        name = filename.split("/")[-1]
        image_file = open(filename, "rb")
        data = {
            'wpUploadFile': image_file,
            'wpDestFile': name,
            'wpUpload': 'Upload File',
            'wpIgnoreWarning': 'off'
        }
        url = self.path + "?title=Special:Upload&action=submit"
        result = self.opener.open((url), data)
        return result.read()
 
 
    def logout(self):
        self.cookie_processor.cookiejar.clear()