%autosave 5
lang = ["C", "C++", "Python", "Java"]
nums = [3,187,1232,53,21398]
print lang
print nums
l = ["foobar", 4, 5.0, True]
lang[2]
lang[4]
lang[-1]
lang[-2]
lang[0:2]
lang[0:4:2]
len(lang)
range(10)
range(0,10,2)
range(4,8)
range(len(lang))
# Not pythonic
for i in range(len(lang)):
print lang[i]
for l in lang:
print l
lang.append("Scheme")
lang
lang2 = ["Pascal", "FORTRAN"]
lang + lang2
lang.extend(lang2)
lang
lang.insert(2, "Haskell")
lang
del lang[2]
lang
lang.pop()
lang
lang.pop(2)
nums
max(nums)
min(nums)
lang.count("Java")
lang.reverse()
lang
lang.sort()
lang
help(lang)
import workshop
TOPTWEETS_FILE = "top_tweets.json"
TWEETS_FILE = "tweets.json"
workshop.get_tweets_text(TOPTWEETS_FILE, 10)
tweets = workshop.get_tweets_text(TOPTWEETS_FILE, 10)
len(tweets)
tweets[0]
for t in tweets:
print len(t)
import math
math.sqrt(2)
l = [100,200,300,400,500]
l2 = []
for x in l:
l2.append(x+1)
l2
[x for x in l]
[x+1 for x in l]
l3 = [1,3,4,7,6,10,11]
l2 = []
for x in l3:
if x%2 == 0:
l2.append(x)
l2
[x for x in l3 if x%2==0]
l1 = [1,2,3,4,5]
l2 = [100,200,300,400,500]
l3 = []
# Not pythonic
for i in range(len(l1)):
l3.append(l1[i] + l2[i])
l3
zl = zip(l1,l2)
zl
for x, y in zl:
print x, y
l3 = []
for x, y in zip(l1,l2):
l3.append(x+y)
l3
[x+y for x,y in zip(l1,l2)]
a = [3,5]
b = (10,42)
len(b)
min(b)
max(b)
b[0]
a[0] = 55
a
b[0] = 37
b.append(10)
s1 = "foobar"
s2 = "foobar"
len(s1)
s1[3]
s1[1:5]
s1 == s2
help(str)
s1.find("oo")
csv="foo,bar,baz"
values = csv.split(",")
values
sep = "|"
sep.join(values)
s = "foo bar baz 42"
s.split()
s.split(" ")
";".join(values)
";;;".join(values)
"foobar".islower()
"foobar".isupper()
"hello world".capitalize()
s = set()
s
s.add(5)
s.add(7)
s.add(10)
s
s.add(5)
s
s1 = set([1,2,3])
s2 = set([1,2,4])
s1 | s2
s1 ^ s2
s1 & s2
s1 - s2
if 1 in s1:
print "FOO"
l1 = [2,3,4,5,6,7,1]
if 1 in l1:
print "FOO"
d = {}
d["A"] = 4.0
d["A-"] = 3.7
d["B+"] = 3.3
d
d2 = {'A': 4.0, 'A-': 3.7, 'B+': 3.3}
d3 = {"ten":10, "twenty":20, "thirty":30}
d["A"]
d3["forty"]
d3.has_key("forty")
d3.has_key("thirty")
d.keys()
d3.keys()
d3.values()
d3.items()
for k,v in d3.items():
print k,v
l = workshop.get_tweets(TOPTWEETS_FILE, 10)
len(l)
l[0]
tweet = l[0]
tweet["text"]
tweet["retweet_count"]
tweet["user"]["screen_name"]
tweet["user"]["name"]
lengths = {}
for tweet in workshop.tweets(TOPTWEETS_FILE, 1000):
l = len(tweet["text"])
if not lengths.has_key(l):
lengths[l] = 1
else:
lengths[l] = lengths[l] + 1
for k,v in lengths.items():
print k,v
l = workshop.get_tweets(TOPTWEETS_FILE, 100)
l[97]["entities"]["hashtags"]
workshop.get_hashtags(l[97])
import operator
hashtags = {}
for tweet in workshop.tweets(TOPTWEETS_FILE, 20000):
hl = workshop.get_hashtags(tweet)
for h in hl:
if not hashtags.has_key(h):
hashtags[h] = 1
else:
hashtags[h] = hashtags[h] + 1
hashtags_freq = hashtags.items()
hashtags_freq.sort(key=operator.itemgetter(1), reverse=True)
for k,v in hashtags_freq[:10]:
print k,v
dict([(x,str(x)) for x in range(10)])
def multiply(a,b):
return a*b
multiply(5,4)
def incr(l):
return [x+1 for x in l]
l = [1,2,3,4,5]
incr(l)
def incr(l, by=1):
return [x+by for x in l]
incr(l, by=5)
def minusplus(x):
return x-1, x+1
minusplus(5)
x, y = minusplus(5)
x
y
import functions
functions.extract_lengths(TOPTWEETS_FILE, 100)
#IPython Notebook-specific
%matplotlib inline
import matplotlib.pylab as plt
ys = [10,5,7,20,-5,7]
plt.plot(ys)
xs=[1,2,7,8,12,50]
plt.plot(xs,ys)
plt.xlabel("Some x values")
plt.ylabel("Some y values")
plt.plot(xs,ys)
import random
random_xs = [random.uniform(-10,10) for x in range(100)]
random_ys = [random.uniform(-10,10) for x in range(100)]
plt.plot(random_xs, random_ys, "+", color="red")
norm_values = [random.normalvariate(10,3) for x in range(10000)]
h = plt.hist(norm_values, bins=100)
import math
xs = [x*0.1 for x in range(-200,200)]
sin_ys = [math.sin(x) for x in xs]
cos_ys = [math.cos(x) for x in xs]
plt.plot(xs, sin_ys, color="red", label="sin(x)")
plt.plot(xs, cos_ys, color="blue", label="cos(x)")
plt.ylim(-2, 2)
plt.legend()
from exercise_functions import extract_values
top_retweets = extract_values(TOPTWEETS_FILE, 20000, "retweet_count")
h = plt.hist(top_retweets, bins=50, log=True)