To confirm the power law in Wikipedia edits (many doing a little, a few doing much) this regular expression and Python code parses a Wikipedia history fairly well:
history_regex = r""".*?oldid=(\d+).*(\d\d:\d\d.*?\d\d\d\d)</a>.*<span class='history-user'>.*?>(.*?)</a>.*(?:<span class='comment'>(.*?)</span>)?</li>"""
regex_obj = re.compile(history_regex)
url = sys.argv[1]
html = getHTML(url)
lines = html.split('\n')
for line in lines:
if line.startswith("<li>(<a"):
counter = counter+1
match_obj = regex_obj.search(line)
if match_obj:
oldid,date,author,comment = match_obj.groups()
edits.setdefault(author,[]).append((oldid,date,author,comment))
counts = [(author,len(edits[author])) for author in edits.keys()]
counts_s = sorted(counts, reverse=True, key=operator.itemgetter(1))
print counter
for author,number in counts_s:
print author, ";", number