language statistics for twitter users with python
guess-language is a nice python script to determine the language of some text. Very useful:
import guess_language
import twitter
api = twitter.Api()
from collections import defaultdict
def guess(username, restrict=None):
#download tweets
tl = api.GetUserTimeline(username, count=200)
d = defaultdict(int)
for tweet in tl:
if restrict is None:
lang = guess_language.guessLanguage(tweet.GetText())
else:
lang = guess_language.guess_language.check(tweet.GetText(), restrict)
d[lang] += 1
return sorted(d.iteritems(),key=lambda x: x[1], reverse=True)
def action_guess(username=""):
"twitter screenname"
print "twitter language staistics for %s:" % username
for lang, anz in guess(username):
print "%-8s %s" %(lang, anz)
Example output for http://twitter.com/chrismarquardt
twitter language staistics for chrismarquardt:
en 129
de 67
UNKNOWN 4