Site Dump
Note: Very preliminary stuff here
This script will collect all available page meta data and write it as CSV into a local file in a local directory on you hard drive.
# ToDo: # snippet to replace all (None) in return data with '' # run double_q for all CSV output (eg title) # use csv_writer from operator import itemgetter import os from time import sleep import sys def text(string): "Return any string as-is, convert None to 'None'" if string: return string else: return '(None)' def escape_crlf(string): "Removes line-ends and replaces them with '\\n'" single = '' for line in string.splitlines(): single = single + '\\n' + line return single[2:] def double_quotes(string): "Doubles any double-quotes so that CSV import will show single double-quotes" return string.replace('"','""') try: import api_parms print 'Imported api_parms' except ImportError: print """ To run this script, you must first create a script 'api_parms.py': user_key = 'your-wiki-userid:your-api-key' # to receive your API key # post to http://www.wikidot.com/forum/t-137525 site = 'your-wikidot.site' # e. g. site = 'xml-api' (optional) category = 'cat' # e. g. category = 'doc' or category = None and--so that it can be imported from any Python user script--save it to """ print os.getcwd() sleep(7) sys.exit(1) from xmlrpclib import ServerProxy s = ServerProxy('https://' + api_parms.user_key + '@www.wikidot.com/xml-rpc-api.php') if api_parms.site: site = api_parms.site else: site = 'xml.api' if api_parms.category: category = None else: category = api_parms.category sep = ',' quote = '"' sep1 = quote sep3 = quote + sep + quote if category: print 'Listing pages ' + site + '.wikidot.com/' + category + ':*' pages = s.site.pages({'site': site, 'category': category}) else: print 'Listing pages ' + site + '.wikidot.com/*' pages = s.site.pages({'site': site}) print len(pages), 'pages found' pages.sort(key=itemgetter('name')) pages.sort(key=itemgetter('category')) csv_name = site if category: csv_name = site + '_' + category csv = open(csv_name + '.csv', 'w') csv.write(sep1 \ + 'site' + sep3 \ + 'category' + sep3 \ + 'name' + sep3 \ + 'full_name' + sep3 \ + 'title' + sep3 \ + 'title_shown' + sep3 \ + 'title_or_unix_name' + sep3 \ + 'parent_page' + sep3 \ + 'user_created' + sep3 \ + 'date_created' + sep3 \ + 'user_edited' + sep3 \ + 'date_edited' + sep3 \ + 'tag_string' + sep1 + '\n') for page in pages: csv.write(sep1 \ + text(page['site']) + sep3 \ + text(page['category']) + sep3 \ + text(page['name']) + sep3 \ + text(page['full_name']) + sep3 \ + text(page['title']).encode('cp437','backslashreplace') + sep3 \ + text(page['title_shown']).encode('cp437','backslashreplace') + sep3 \ + text(page['title_or_unix_name']).encode('cp437','backslashreplace') + sep3 \ + text(page['parent_page']) + sep3 \ + text(page['user_created']) + sep3 \ + text(page['date_created']) + sep3 \ + text(page['user_edited']) + sep3 \ + text(page['date_edited']) + sep3 \ + text(page['tag_string']).encode('cp437','backslashreplace') + sep1 + '\n') csv.close() sleep(10)