Benutzer:Hub/antragsfabrik-openslides-export.py: Unterschied zwischen den Versionen

Aktuelle Version vom 27. August 2012, 13:05 Uhr

Script findet sich hier: https://gist.github.com/3488254

@@ Zeile 1: / Zeile 1: @@
-  #!/usr/bin/env python
+Script findet sich hier:
-  # -*- coding: utf-8 -*-
+https://gist.github.com/3488254
-  import urllib2
-  import json
-  import os
-  import csv
-  API_URL = "http://wiki.piratenbrandenburg.de/api.php"
-  CATEGORIES = ["Kategorie:Sonstiger_Antrag_AF_LPT_2012.1",
-                "Kategorie:Satzungsänderungsantrag_AF_LPT_2012.1",
-                "Kategorie:Programmantrag_AF_LPT_2012.1"]
-  MAX_PAGEIDS = 50
-  def get_json(endpoint):
-    url = ''.join([
-             API_URL,
-             '?',
-             endpoint,
-             '&format=json',
-             ])
-    return urllib2.urlopen(url).read()
-  def get_category(category, query_continue=""):
-    data = get_json("action=query&list=categorymembers&cmtitle=%s&cmcontinue=%s" % (category, query_continue))
-    json_data = json.loads(data)
-    pages = json_data["query"]["categorymembers"]
-    if "query-continue" in json_data:
-      pages += get_category(category,json_data["query-continue"]["categorymembers"]["cmcontinue"])
-    return pages
-  def list_applications(categories):
-    if os.path.isfile("application_list"):
-      f = open('application_list','r')
-      return json.load(f)
-    return download_applications(categories)
-  def download_applications(categories):
-    applications = _list_applications(categories)
-    f = open('application_list','w+')
-    json.dump(applications, f)
-    f.flush()
-    return applications
-  def _list_applications(categories):
-    applications = {}
-    for category in categories:
-      pages = get_category(category)
-      applications[category] = pages
-    return applications
-  def get_raw_pageid(pageid):
-    data = get_json("action=query&prop=revisions&rvprop=content&pageids=%s" % pageid)
-    json_data = json.loads(data)
-    pages = json_data["query"]["pages"]
-    content = []
-    for pageids in pages:
-      content += pages[pageids]["revisions"]
-    return content
-  def chunks(l, n):
-    for i in xrange(0, len(l), n):
-      yield l[i:i+n]
-  def get_pageid(pageids):
-    pages = []
-    for chunk in chunks(pageids, MAX_PAGEIDS):
-      pages += get_raw_pageid("|".join(str(i) for i in chunk))
-    return pages
-  def _list_content(applications):
-    pageids = {}
-    content = {}
-    for category in applications.iterkeys():
-      for application in applications[category]:
-        if category in pageids:
-          pageids[category] += [application["pageid"]]
-        else:
-          pageids[category] = [application["pageid"]]
-      content[category] = get_pageid(pageids[category])
-    return content
-  def download_content(applications):
-    content = _list_content(applications)
-    f = open('content','w+')
-    json.dump(content,f)
-    f.flush()
-    return content
-  def list_content(applications):
-    if os.path.isfile("content"):
-      f = open('content','r')
-      return json.load(f)
-    return download_content(applications)
-  def parse_content(content):
-    applications = {}
-    for category in content.iterkeys():
-      applications_for_category = []
-      for application_content in content[category]:
-        application = mediawiki_template(application_content["*"])
-        if application["Eingereicht"] != "":
-          applications_for_category.append(application)
-      applications_for_category.sort(key = lambda x: x["Titel"])
-      applications[category] = applications_for_category
-    return applications
-  def mediawiki_template(mw_string):
-    """ returns media wiki template element as a hash"""
-    #Split content inside Template
-    strings = mw_string.split("{{")[1].split("}}")[0].split("\n|")
-    #remove "Antragsfabrikat"
-    strings = strings[1:]
-    mw_hash = {}
-    for string in strings:
-      keyval = string.split("=",1)
-      if 2 != len(keyval):
-        raise SyntaxError("Mediawiki parsing Error %s" % keyval)
-      keyval = [s.strip() for s in keyval]
-      key, val = keyval
-      mw_hash[key] = val
-    return mw_hash
-  def filter_content(content):
-    """ simple filter for some html tags to plain text"""
-    content = content.replace("<sup>1</sup>","¹")
-    content = content.replace("<sup>2</sup>","²")
-    content = content.replace("<sup>3</sup>","³")
-    content = content.replace("<br>","\n")
-    content = content.replace("<br\>","\n")
-    content = content.replace("<br\\n>","\n")
-    content = content.replace("<br />","\n")
-    content = content.replace("<br/>","\n")
-    return content
-  def write_content(applications):
-    for category in applications:
-      f = open(category,'w+')
-      writer = csv.writer(f,delimiter=',', quotechar='"', quoting=csv.QUOTE_ALL)
-      writer.writerow( ("Number","Title","Text","Reason",
-                        "Submitter (First Name)","Submitter (Last Name)"))
-      for a in applications[category]:
-        writer.writerow( ( "",
-                          a["Titel"].encode('utf8'),
-                          filter_content(a["Antragstext"].encode('utf8')),
-                          filter_content(a[u'Begr\xfcndung'].encode('utf8')),
-                          a["Antragsteller"].encode('utf8'),
-                          "") ) #Last Name
-      f.flush()
-      f.close()
-  if __name__ == '__main__':
-    #download_applications(CATEGORIES)
-    applications = list_applications(CATEGORIES)
-    #download_content(applications)
-    content = list_content(applications)
-    applications = parse_content(content)
-    write_content(applications)

Benutzer:Hub/antragsfabrik-openslides-export.py: Unterschied zwischen den Versionen

Aktuelle Version vom 27. August 2012, 13:05 Uhr

Navigationsmenü

Suche