requêtes sur le wiktionnaire, version simple
This commit is contained in:
parent
36dda2bf81
commit
a596ea3661
1 changed files with 44 additions and 0 deletions
44
src/wiktionnaire.py
Normal file
44
src/wiktionnaire.py
Normal file
|
|
@ -0,0 +1,44 @@
|
||||||
|
#encoding: utf-8
|
||||||
|
|
||||||
|
import sys
|
||||||
|
from urllib.request import urlopen
|
||||||
|
import re
|
||||||
|
|
||||||
|
def download( url ):
|
||||||
|
with urlopen( url ) as page:
|
||||||
|
while True:
|
||||||
|
data = page.readline()
|
||||||
|
if data:
|
||||||
|
#print(data)
|
||||||
|
yield data
|
||||||
|
else:
|
||||||
|
break
|
||||||
|
|
||||||
|
def grep( pattern, page ):
|
||||||
|
for line in page:
|
||||||
|
if re.match(pattern, line):
|
||||||
|
yield line
|
||||||
|
|
||||||
|
def decode( strings ):
|
||||||
|
for s in strings:
|
||||||
|
yield s.decode("utf-8")
|
||||||
|
|
||||||
|
def format( strings ):
|
||||||
|
for s in strings:
|
||||||
|
res = s
|
||||||
|
res = res.replace("'''","")
|
||||||
|
res = res.replace("''","")
|
||||||
|
res = re.sub(r'\[\[(\w*)\|\w*\]\]', r'\1', res)
|
||||||
|
res = res.replace("[[","")
|
||||||
|
res = res.replace("]]","")
|
||||||
|
res = re.sub(r'{{-\w*-}}', r'', res)
|
||||||
|
res = res.replace("\n","")
|
||||||
|
yield res
|
||||||
|
|
||||||
|
term = sys.argv[1]
|
||||||
|
url="http://fr.wiktionary.org/w/api.php?format=xml&action=query&titles=%s&rvprop=content&prop=revisions&redirects=1"
|
||||||
|
match = "^:\s"
|
||||||
|
|
||||||
|
for d in grep( match, format( decode( download( url % term ) ) ) ):
|
||||||
|
print(term,d)
|
||||||
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue