creatdata.py 291 B

1234567891011121314
  1. import json
  2. twowords=set()
  3. stopwords=set()
  4. for line in open("vietdict.txt", 'r'):
  5. a=json.loads(line)
  6. t_str=a["text"]
  7. tmp=str.split(t_str)
  8. if (len(tmp)==2):
  9. set.add(twowords,t_str)
  10. for line in open("vietnamese-stopwords.txt", 'r'):
  11. set.add(stopwords,line[:-1])