Änderungen

← Zum vorherigen Versionsunterschied

Benutzer:Hub/antragsfabrik-openslides-export.py

6.044 Bytes entfernt, 14:05, 27. Aug. 2012

move to Github

~~#!/usr/bin/env python~~ ~~# -*- coding: utf-8 -*-~~ ~~import urllib2~~ ~~import json~~ ~~import os~~ ~~import csv~~ ~~API_URL = "http://wiki.piratenbrandenburg.de/api.php"~~ ~~CATEGORIES = ["Kategorie:Sonstiger_Antrag_AF_LPT_2012.1",~~ ~~"Kategorie:Satzungsänderungsantrag_AF_LPT_2012.1",~~ ~~"Kategorie:Programmantrag_AF_LPT_2012.1"]~~ ~~MAX_PAGEIDS = 50~~ ~~def get_json(endpoint):~~ ~~url = ''.join([~~ ~~API_URL,~~ ~~'?',~~ ~~endpoint,~~ ~~'&format=json',~~ ]) ~~return urllib2.urlopen(url).read()~~ ~~def get_category(category, query_continue=""):~~ ~~data = get_json("action=query&list=categorymembers&cmtitle=%s&cmcontinue=%s" % (category, query_continue))~~ ~~json_data = json.loads(data)~~ ~~pages = json_data["query"]["categorymembers"]~~ ~~if "query-continue" in json_data:~~ ~~pages += get_category(category,json_data["query-continue"]["categorymembers"]["cmcontinue"])~~ ~~return pages~~ ~~def list_applications(categories):~~ ~~if os.path.isfile("application_list"):~~ ~~f = open('application_list','r')~~ ~~return json.load(f)~~ ~~return download_applications(categories)~~ ~~def download_applications(categories):~~ ~~applications = _list_applications(categories)~~ ~~f = open('application_list','w+')~~ ~~json.dump(applications, f)~~ ~~f.flush()~~ ~~return applications~~ ~~def _list_applications(categories):~~ ~~applications = {}~~ ~~for category in categories:~~ ~~pages = get_category(category)~~ ~~applications[category] = pages~~ ~~return applications~~ ~~def get_raw_pageid(pageid):~~ ~~data = get_json("action=query&prop=revisions&rvprop=content&pageids=%s" % pageid)~~ ~~json_data = json.loads(data)~~ ~~pages = json_data["query"]["pages"]~~ ~~content = []~~ ~~for pageids in pages:~~ ~~content += pages[pageids]["revisions"]~~ ~~return content~~ ~~def chunks(l, n):~~ ~~for i in xrange(0, len(l), n):~~ ~~yield l[i:i+n]~~ ~~def get_pageid(pageids):~~ ~~pages = []~~ ~~for chunk in chunks(pageids, MAX_PAGEIDS):~~ ~~pages += get_raw_pageid("|".join(str(i) for i in chunk))~~ ~~return pages~~ ~~def _list_content(applications):~~ ~~pageids = {}~~ ~~content = {}~~ ~~for category in applications.iterkeys():~~ ~~for application in applications[category]:~~ ~~if category in pageids:~~ ~~pageids[category] += [application["pageid"]]~~ ~~else:~~ ~~pageids[category] = [application["pageid"]]~~ ~~content[category] = get_pageid(pageids[category])~~ ~~return content~~ ~~def download_content(applications):~~ ~~content = _list_content(applications)~~ ~~f = open('content','w+')~~ ~~json.dump(content,f)~~ ~~f.flush()~~ ~~return content~~ ~~def list_content(applications):~~ ~~if os.path.isfile("content"):~~ ~~f = open('content','r')~~ ~~return json.load(f)~~ ~~return download_content(applications)~~ ~~def parse_content(content):~~ ~~applications = {}~~ ~~for category in content.iterkeys():~~ ~~applications_for_category = []~~ ~~for application_content in content[category]:~~ ~~application = mediawiki_template(application_content["*"])~~ ~~if application["Eingereicht"] != ""~~Script findet sich hier: ~~applications_for_category.append(application)~~ ~~applications_for_category.sort(key = lambda a: a["Titel"])~~ ~~applications[category] = applications_for_category~~ ~~return applications~~ ~~def mediawiki_template(mw_string)~~https: ~~""" returns media wiki template element as a hash"""~~ ~~#Split content inside Template~~ ~~strings = mw_string.split("{{")[1].split("}}")[0].split("\n|")~~ ~~#remove "Antragsfabrikat"~~ ~~strings = strings[1:]~~ ~~mw_hash = {}~~ ~~for string in strings:~~ ~~keyval = string.split("=",1)~~ ~~if 2 != len(keyval):~~ ~~raise SyntaxError("Mediawiki parsing Error %s" % keyval)~~ ~~keyval = [s.strip() for s in keyval]~~ ~~key, val = keyval~~ ~~mw_hash[key] = val~~ ~~return mw_hash~~ ~~def filter_content(content):~~ ~~""" simple filter for some html tags to plain text"""~~ ~~content = content.replace("1<~~/~~sup>","¹")~~ ~~content = content.replace("2<~~/~~sup>","²")~~ ~~content = content~~gist.~~replace("3","³")~~ ~~content = content.replace(" ","\n")~~ ~~content = content~~github.~~replace("<br\>","\n")~~ ~~content = content.replace("<br\\n>","\n")~~ ~~content = content.replace(" ","\n")~~ ~~content = content.replace("<br~~com/~~>","\n")~~ ~~return content~~ ~~def write_content(applications, applications_position=[]):~~ ~~open_position = []~~ ~~open_position.extend(applications_position)~~ ~~for category in applications:~~ ~~f = open(category,'w+')~~ ~~writer = csv.writer(f,delimiter=',', quotechar='"', quoting=csv.QUOTE_ALL)~~ ~~writer.writerow( ("Number","Title","Text","Reason",~~ ~~"Submitter (First Name)","Submitter (Last Name)"))~~ ~~for a in applications[category]:~~ ~~try:~~ ~~number = applications_position.index(a["Titel"]) + 1~~ ~~open_position.remove(a["Titel"])~~ ~~except ValueError:~~ ~~print '"' + a["Titel"] + '" im Antragsbuch nicht gefunden'~~ ~~number = ""~~ ~~writer.writerow( ( number, # number starts at 1~~ ~~a["Titel"].encode('utf8'),~~ ~~filter_content(a["Antragstext"].encode('utf8')),~~ ~~filter_content(a[u'Begr\xfcndung'].encode('utf8')),~~ ~~a["Antragsteller"].encode('utf8'),~~ ~~"") ) #Last Name~~ ~~f.flush()~~ ~~f.close()~~ ~~if open_position != []:~~ ~~print "Anträge aus dem Antragsbuch, die nicht gefunden wurden: "~~ ~~for a in open_position:~~ ~~print a~~ ~~def get_application_positions(filename):~~ ~~f = open(filename,'r')~~ ~~lines = [l.strip().decode('utf8') for l in f.readlines()]~~ ~~return lines~~ ~~if __name__ == '__main__':~~ ~~#download_applications(CATEGORIES)~~ ~~applications = list_applications(CATEGORIES)~~ ~~#download_content(applications)~~ ~~content = list_content(applications)~~ ~~applications = parse_content(content)~~ ~~#Ein Titel per Zeile, TO-Reihenfolge gegeben~~ ~~#positions = get_application_positions("reihenfolge-to")~~ ~~write_content(applications, positions)~~3488254

Hub

168

Bearbeitungen

Änderungen

Benutzer:Hub/antragsfabrik-openslides-export.py

Navigationsmenü

Meine Werkzeuge

Namensräume

Varianten

Ansichten

Mehr

Suche

Wiki

WikiMirror

Homepage

Werkzeuge