Introduction to Python

In [1]:
%autosave 5
Autosaving every 5 seconds

Lists

In [2]:
lang = ["C", "C++", "Python", "Java"]
In [3]:
nums = [3,187,1232,53,21398]
In [4]:
print lang
['C', 'C++', 'Python', 'Java']

In [5]:
print nums
[3, 187, 1232, 53, 21398]

In [6]:
l = ["foobar", 4, 5.0, True]
In [7]:
lang[2]
Out[7]:
'Python'
In [9]:
lang[4]
---------------------------------------------------------------------------
IndexError                                Traceback (most recent call last)
<ipython-input-9-e83aaddb2283> in <module>()
----> 1 lang[4]

IndexError: list index out of range
In [10]:
lang[-1]
Out[10]:
'Java'
In [11]:
lang[-2]
Out[11]:
'Python'
In [12]:
lang[0:2]
Out[12]:
['C', 'C++']
In [13]:
lang[0:4:2]
Out[13]:
['C', 'Python']
In [14]:
len(lang)
Out[14]:
4
In [15]:
range(10)
Out[15]:
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
In [16]:
range(0,10,2)
Out[16]:
[0, 2, 4, 6, 8]
In [17]:
range(4,8)
Out[17]:
[4, 5, 6, 7]
In [18]:
range(len(lang))
Out[18]:
[0, 1, 2, 3]
In [19]:
# Not pythonic
for i in range(len(lang)):
    print lang[i]
C
C++
Python
Java

In [20]:
for l in lang:
    print l
C
C++
Python
Java

In [21]:
lang.append("Scheme")
In [22]:
lang
Out[22]:
['C', 'C++', 'Python', 'Java', 'Scheme']
In [23]:
lang2 = ["Pascal", "FORTRAN"]
In [24]:
lang + lang2
Out[24]:
['C', 'C++', 'Python', 'Java', 'Scheme', 'Pascal', 'FORTRAN']
In [25]:
lang.extend(lang2)
In [26]:
lang
Out[26]:
['C', 'C++', 'Python', 'Java', 'Scheme', 'Pascal', 'FORTRAN']
In [27]:
lang.insert(2, "Haskell")
In [28]:
lang
Out[28]:
['C', 'C++', 'Haskell', 'Python', 'Java', 'Scheme', 'Pascal', 'FORTRAN']
In [29]:
del lang[2]
In [30]:
lang
Out[30]:
['C', 'C++', 'Python', 'Java', 'Scheme', 'Pascal', 'FORTRAN']
In [31]:
lang.pop()
Out[31]:
'FORTRAN'
In [32]:
lang
Out[32]:
['C', 'C++', 'Python', 'Java', 'Scheme', 'Pascal']
In [33]:
lang.pop(2)
Out[33]:
'Python'
In [34]:
nums
Out[34]:
[3, 187, 1232, 53, 21398]
In [35]:
max(nums)
Out[35]:
21398
In [36]:
min(nums)
Out[36]:
3
In [37]:
lang.count("Java")
Out[37]:
1
In [38]:
lang.reverse()
In [39]:
lang
Out[39]:
['Pascal', 'Scheme', 'Java', 'C++', 'C']
In [40]:
lang.sort()
In [42]:
lang
Out[42]:
['C', 'C++', 'Java', 'Pascal', 'Scheme']
In [45]:
help(lang)
Help on list object:

class list(object)
 |  list() -> new empty list
 |  list(iterable) -> new list initialized from iterable's items
 |  
 |  Methods defined here:
 |  
 |  __add__(...)
 |      x.__add__(y) <==> x+y
 |  
 |  __contains__(...)
 |      x.__contains__(y) <==> y in x
 |  
 |  __delitem__(...)
 |      x.__delitem__(y) <==> del x[y]
 |  
 |  __delslice__(...)
 |      x.__delslice__(i, j) <==> del x[i:j]
 |      
 |      Use of negative indices is not supported.
 |  
 |  __eq__(...)
 |      x.__eq__(y) <==> x==y
 |  
 |  __ge__(...)
 |      x.__ge__(y) <==> x>=y
 |  
 |  __getattribute__(...)
 |      x.__getattribute__('name') <==> x.name
 |  
 |  __getitem__(...)
 |      x.__getitem__(y) <==> x[y]
 |  
 |  __getslice__(...)
 |      x.__getslice__(i, j) <==> x[i:j]
 |      
 |      Use of negative indices is not supported.
 |  
 |  __gt__(...)
 |      x.__gt__(y) <==> x>y
 |  
 |  __iadd__(...)
 |      x.__iadd__(y) <==> x+=y
 |  
 |  __imul__(...)
 |      x.__imul__(y) <==> x*=y
 |  
 |  __init__(...)
 |      x.__init__(...) initializes x; see help(type(x)) for signature
 |  
 |  __iter__(...)
 |      x.__iter__() <==> iter(x)
 |  
 |  __le__(...)
 |      x.__le__(y) <==> x<=y
 |  
 |  __len__(...)
 |      x.__len__() <==> len(x)
 |  
 |  __lt__(...)
 |      x.__lt__(y) <==> x<y
 |  
 |  __mul__(...)
 |      x.__mul__(n) <==> x*n
 |  
 |  __ne__(...)
 |      x.__ne__(y) <==> x!=y
 |  
 |  __repr__(...)
 |      x.__repr__() <==> repr(x)
 |  
 |  __reversed__(...)
 |      L.__reversed__() -- return a reverse iterator over the list
 |  
 |  __rmul__(...)
 |      x.__rmul__(n) <==> n*x
 |  
 |  __setitem__(...)
 |      x.__setitem__(i, y) <==> x[i]=y
 |  
 |  __setslice__(...)
 |      x.__setslice__(i, j, y) <==> x[i:j]=y
 |      
 |      Use  of negative indices is not supported.
 |  
 |  __sizeof__(...)
 |      L.__sizeof__() -- size of L in memory, in bytes
 |  
 |  append(...)
 |      L.append(object) -- append object to end
 |  
 |  count(...)
 |      L.count(value) -> integer -- return number of occurrences of value
 |  
 |  extend(...)
 |      L.extend(iterable) -- extend list by appending elements from the iterable
 |  
 |  index(...)
 |      L.index(value, [start, [stop]]) -> integer -- return first index of value.
 |      Raises ValueError if the value is not present.
 |  
 |  insert(...)
 |      L.insert(index, object) -- insert object before index
 |  
 |  pop(...)
 |      L.pop([index]) -> item -- remove and return item at index (default last).
 |      Raises IndexError if list is empty or index is out of range.
 |  
 |  remove(...)
 |      L.remove(value) -- remove first occurrence of value.
 |      Raises ValueError if the value is not present.
 |  
 |  reverse(...)
 |      L.reverse() -- reverse *IN PLACE*
 |  
 |  sort(...)
 |      L.sort(cmp=None, key=None, reverse=False) -- stable sort *IN PLACE*;
 |      cmp(x, y) -> -1, 0, 1
 |  
 |  ----------------------------------------------------------------------
 |  Data and other attributes defined here:
 |  
 |  __hash__ = None
 |  
 |  __new__ = <built-in method __new__ of type object>
 |      T.__new__(S, ...) -> a new object with type S, a subtype of T


Twitter Example

In [46]:
import workshop
In [47]:
TOPTWEETS_FILE = "top_tweets.json"
TWEETS_FILE = "tweets.json"
In [48]:
workshop.get_tweets_text(TOPTWEETS_FILE, 10)
Out[48]:
[u'rt if u used to wear love spell',
 u'TWO SUMMERS IN ONE YEAR?! THANKS AUSTRALIA!\U0001f334\u2600\ufe0f\U0001f357\U0001f385\u2600\ufe0f\U0001f334',
 u'\U0001f64f heaven is a place on earth \U0001f64f http://t.co/BcKOCys75p',
 u"The more fuzzy stuff you wear the more he'll think you're an actual teddy bear.",
 u'hi http://t.co/ihlaO8XyUF',
 u'RT @WORLDSTARVlNE: OMG @katyperry HALLOWEEN COSTUME http://t.co/lN7y2BMGGJ',
 u'RT @WorIdStarComedy: Katy Perry the realest http://t.co/l9zTSk6egI',
 u'\U0001f383whore-o-ween\U0001f383',
 u'Lay me down on ur flames @ChesterCheetah: @katyperry If u think u can make me fall in \u2764\ufe0f by dressing as a Flamin Hot Cheeto, ur 100% correct',
 u'*WIPES CHEETO DUST OFF SHOULDERS* \u201c@FLApfkp: u da real mvp @katyperry http://t.co/9PGvGxEyvR\u201d']
In [49]:
tweets = workshop.get_tweets_text(TOPTWEETS_FILE, 10)
In [50]:
len(tweets)
Out[50]:
10
In [51]:
tweets[0]
Out[51]:
u'rt if u used to wear love spell'
In [52]:
for t in tweets:
    print len(t)
31
51
53
79
25
74
66
14
140
93

In [53]:
import math
In [54]:
math.sqrt(2)
Out[54]:
1.4142135623730951

List Comprehensions

In [55]:
l = [100,200,300,400,500]
In [56]:
l2 = []
In [57]:
for x in l:
    l2.append(x+1)
In [58]:
l2
Out[58]:
[101, 201, 301, 401, 501]
In [59]:
[x for x in l]
Out[59]:
[100, 200, 300, 400, 500]
In [60]:
[x+1 for x in l]
Out[60]:
[101, 201, 301, 401, 501]
In [61]:
l3 = [1,3,4,7,6,10,11]
In [62]:
l2 = []
for x in l3:
    if x%2 == 0:
        l2.append(x)
In [63]:
l2
Out[63]:
[4, 6, 10]
In [65]:
[x for x in l3 if x%2==0]
Out[65]:
[4, 6, 10]
In [66]:
l1 = [1,2,3,4,5]
l2 = [100,200,300,400,500]
In [67]:
l3 = []
# Not pythonic
for i in range(len(l1)):
    l3.append(l1[i] + l2[i])
In [68]:
l3
Out[68]:
[101, 202, 303, 404, 505]
In [70]:
zl = zip(l1,l2)
In [71]:
zl
Out[71]:
[(1, 100), (2, 200), (3, 300), (4, 400), (5, 500)]
In [75]:
for x, y in zl:
    print x, y
1 100
2 200
3 300
4 400
5 500

In [76]:
l3 = []
for x, y in zip(l1,l2):
    l3.append(x+y)
In [77]:
l3
Out[77]:
[101, 202, 303, 404, 505]
In [78]:
[x+y for x,y in zip(l1,l2)]
Out[78]:
[101, 202, 303, 404, 505]

Tuples

In [79]:
a = [3,5]
In [80]:
b = (10,42)
In [81]:
len(b)
Out[81]:
2
In [82]:
min(b)
Out[82]:
10
In [83]:
max(b)
Out[83]:
42
In [84]:
b[0]
Out[84]:
10
In [85]:
a[0] = 55
In [86]:
a
Out[86]:
[55, 5]
In [87]:
b[0] = 37
---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-87-d829f3437425> in <module>()
----> 1 b[0] = 37

TypeError: 'tuple' object does not support item assignment
In [88]:
b.append(10)
---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-88-37a3cd709306> in <module>()
----> 1 b.append(10)

AttributeError: 'tuple' object has no attribute 'append'

Strings

In [89]:
s1 = "foobar"
In [90]:
s2 = "foobar"
In [91]:
len(s1)
Out[91]:
6
In [92]:
s1[3]
Out[92]:
'b'
In [93]:
s1[1:5]
Out[93]:
'ooba'
In [94]:
s1 == s2
Out[94]:
True
In [95]:
help(str)
Help on class str in module __builtin__:

class str(basestring)
 |  str(object='') -> string
 |  
 |  Return a nice string representation of the object.
 |  If the argument is a string, the return value is the same object.
 |  
 |  Method resolution order:
 |      str
 |      basestring
 |      object
 |  
 |  Methods defined here:
 |  
 |  __add__(...)
 |      x.__add__(y) <==> x+y
 |  
 |  __contains__(...)
 |      x.__contains__(y) <==> y in x
 |  
 |  __eq__(...)
 |      x.__eq__(y) <==> x==y
 |  
 |  __format__(...)
 |      S.__format__(format_spec) -> string
 |      
 |      Return a formatted version of S as described by format_spec.
 |  
 |  __ge__(...)
 |      x.__ge__(y) <==> x>=y
 |  
 |  __getattribute__(...)
 |      x.__getattribute__('name') <==> x.name
 |  
 |  __getitem__(...)
 |      x.__getitem__(y) <==> x[y]
 |  
 |  __getnewargs__(...)
 |  
 |  __getslice__(...)
 |      x.__getslice__(i, j) <==> x[i:j]
 |      
 |      Use of negative indices is not supported.
 |  
 |  __gt__(...)
 |      x.__gt__(y) <==> x>y
 |  
 |  __hash__(...)
 |      x.__hash__() <==> hash(x)
 |  
 |  __le__(...)
 |      x.__le__(y) <==> x<=y
 |  
 |  __len__(...)
 |      x.__len__() <==> len(x)
 |  
 |  __lt__(...)
 |      x.__lt__(y) <==> x<y
 |  
 |  __mod__(...)
 |      x.__mod__(y) <==> x%y
 |  
 |  __mul__(...)
 |      x.__mul__(n) <==> x*n
 |  
 |  __ne__(...)
 |      x.__ne__(y) <==> x!=y
 |  
 |  __repr__(...)
 |      x.__repr__() <==> repr(x)
 |  
 |  __rmod__(...)
 |      x.__rmod__(y) <==> y%x
 |  
 |  __rmul__(...)
 |      x.__rmul__(n) <==> n*x
 |  
 |  __sizeof__(...)
 |      S.__sizeof__() -> size of S in memory, in bytes
 |  
 |  __str__(...)
 |      x.__str__() <==> str(x)
 |  
 |  capitalize(...)
 |      S.capitalize() -> string
 |      
 |      Return a copy of the string S with only its first character
 |      capitalized.
 |  
 |  center(...)
 |      S.center(width[, fillchar]) -> string
 |      
 |      Return S centered in a string of length width. Padding is
 |      done using the specified fill character (default is a space)
 |  
 |  count(...)
 |      S.count(sub[, start[, end]]) -> int
 |      
 |      Return the number of non-overlapping occurrences of substring sub in
 |      string S[start:end].  Optional arguments start and end are interpreted
 |      as in slice notation.
 |  
 |  decode(...)
 |      S.decode([encoding[,errors]]) -> object
 |      
 |      Decodes S using the codec registered for encoding. encoding defaults
 |      to the default encoding. errors may be given to set a different error
 |      handling scheme. Default is 'strict' meaning that encoding errors raise
 |      a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'
 |      as well as any other name registered with codecs.register_error that is
 |      able to handle UnicodeDecodeErrors.
 |  
 |  encode(...)
 |      S.encode([encoding[,errors]]) -> object
 |      
 |      Encodes S using the codec registered for encoding. encoding defaults
 |      to the default encoding. errors may be given to set a different error
 |      handling scheme. Default is 'strict' meaning that encoding errors raise
 |      a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and
 |      'xmlcharrefreplace' as well as any other name registered with
 |      codecs.register_error that is able to handle UnicodeEncodeErrors.
 |  
 |  endswith(...)
 |      S.endswith(suffix[, start[, end]]) -> bool
 |      
 |      Return True if S ends with the specified suffix, False otherwise.
 |      With optional start, test S beginning at that position.
 |      With optional end, stop comparing S at that position.
 |      suffix can also be a tuple of strings to try.
 |  
 |  expandtabs(...)
 |      S.expandtabs([tabsize]) -> string
 |      
 |      Return a copy of S where all tab characters are expanded using spaces.
 |      If tabsize is not given, a tab size of 8 characters is assumed.
 |  
 |  find(...)
 |      S.find(sub [,start [,end]]) -> int
 |      
 |      Return the lowest index in S where substring sub is found,
 |      such that sub is contained within S[start:end].  Optional
 |      arguments start and end are interpreted as in slice notation.
 |      
 |      Return -1 on failure.
 |  
 |  format(...)
 |      S.format(*args, **kwargs) -> string
 |      
 |      Return a formatted version of S, using substitutions from args and kwargs.
 |      The substitutions are identified by braces ('{' and '}').
 |  
 |  index(...)
 |      S.index(sub [,start [,end]]) -> int
 |      
 |      Like S.find() but raise ValueError when the substring is not found.
 |  
 |  isalnum(...)
 |      S.isalnum() -> bool
 |      
 |      Return True if all characters in S are alphanumeric
 |      and there is at least one character in S, False otherwise.
 |  
 |  isalpha(...)
 |      S.isalpha() -> bool
 |      
 |      Return True if all characters in S are alphabetic
 |      and there is at least one character in S, False otherwise.
 |  
 |  isdigit(...)
 |      S.isdigit() -> bool
 |      
 |      Return True if all characters in S are digits
 |      and there is at least one character in S, False otherwise.
 |  
 |  islower(...)
 |      S.islower() -> bool
 |      
 |      Return True if all cased characters in S are lowercase and there is
 |      at least one cased character in S, False otherwise.
 |  
 |  isspace(...)
 |      S.isspace() -> bool
 |      
 |      Return True if all characters in S are whitespace
 |      and there is at least one character in S, False otherwise.
 |  
 |  istitle(...)
 |      S.istitle() -> bool
 |      
 |      Return True if S is a titlecased string and there is at least one
 |      character in S, i.e. uppercase characters may only follow uncased
 |      characters and lowercase characters only cased ones. Return False
 |      otherwise.
 |  
 |  isupper(...)
 |      S.isupper() -> bool
 |      
 |      Return True if all cased characters in S are uppercase and there is
 |      at least one cased character in S, False otherwise.
 |  
 |  join(...)
 |      S.join(iterable) -> string
 |      
 |      Return a string which is the concatenation of the strings in the
 |      iterable.  The separator between elements is S.
 |  
 |  ljust(...)
 |      S.ljust(width[, fillchar]) -> string
 |      
 |      Return S left-justified in a string of length width. Padding is
 |      done using the specified fill character (default is a space).
 |  
 |  lower(...)
 |      S.lower() -> string
 |      
 |      Return a copy of the string S converted to lowercase.
 |  
 |  lstrip(...)
 |      S.lstrip([chars]) -> string or unicode
 |      
 |      Return a copy of the string S with leading whitespace removed.
 |      If chars is given and not None, remove characters in chars instead.
 |      If chars is unicode, S will be converted to unicode before stripping
 |  
 |  partition(...)
 |      S.partition(sep) -> (head, sep, tail)
 |      
 |      Search for the separator sep in S, and return the part before it,
 |      the separator itself, and the part after it.  If the separator is not
 |      found, return S and two empty strings.
 |  
 |  replace(...)
 |      S.replace(old, new[, count]) -> string
 |      
 |      Return a copy of string S with all occurrences of substring
 |      old replaced by new.  If the optional argument count is
 |      given, only the first count occurrences are replaced.
 |  
 |  rfind(...)
 |      S.rfind(sub [,start [,end]]) -> int
 |      
 |      Return the highest index in S where substring sub is found,
 |      such that sub is contained within S[start:end].  Optional
 |      arguments start and end are interpreted as in slice notation.
 |      
 |      Return -1 on failure.
 |  
 |  rindex(...)
 |      S.rindex(sub [,start [,end]]) -> int
 |      
 |      Like S.rfind() but raise ValueError when the substring is not found.
 |  
 |  rjust(...)
 |      S.rjust(width[, fillchar]) -> string
 |      
 |      Return S right-justified in a string of length width. Padding is
 |      done using the specified fill character (default is a space)
 |  
 |  rpartition(...)
 |      S.rpartition(sep) -> (head, sep, tail)
 |      
 |      Search for the separator sep in S, starting at the end of S, and return
 |      the part before it, the separator itself, and the part after it.  If the
 |      separator is not found, return two empty strings and S.
 |  
 |  rsplit(...)
 |      S.rsplit([sep [,maxsplit]]) -> list of strings
 |      
 |      Return a list of the words in the string S, using sep as the
 |      delimiter string, starting at the end of the string and working
 |      to the front.  If maxsplit is given, at most maxsplit splits are
 |      done. If sep is not specified or is None, any whitespace string
 |      is a separator.
 |  
 |  rstrip(...)
 |      S.rstrip([chars]) -> string or unicode
 |      
 |      Return a copy of the string S with trailing whitespace removed.
 |      If chars is given and not None, remove characters in chars instead.
 |      If chars is unicode, S will be converted to unicode before stripping
 |  
 |  split(...)
 |      S.split([sep [,maxsplit]]) -> list of strings
 |      
 |      Return a list of the words in the string S, using sep as the
 |      delimiter string.  If maxsplit is given, at most maxsplit
 |      splits are done. If sep is not specified or is None, any
 |      whitespace string is a separator and empty strings are removed
 |      from the result.
 |  
 |  splitlines(...)
 |      S.splitlines(keepends=False) -> list of strings
 |      
 |      Return a list of the lines in S, breaking at line boundaries.
 |      Line breaks are not included in the resulting list unless keepends
 |      is given and true.
 |  
 |  startswith(...)
 |      S.startswith(prefix[, start[, end]]) -> bool
 |      
 |      Return True if S starts with the specified prefix, False otherwise.
 |      With optional start, test S beginning at that position.
 |      With optional end, stop comparing S at that position.
 |      prefix can also be a tuple of strings to try.
 |  
 |  strip(...)
 |      S.strip([chars]) -> string or unicode
 |      
 |      Return a copy of the string S with leading and trailing
 |      whitespace removed.
 |      If chars is given and not None, remove characters in chars instead.
 |      If chars is unicode, S will be converted to unicode before stripping
 |  
 |  swapcase(...)
 |      S.swapcase() -> string
 |      
 |      Return a copy of the string S with uppercase characters
 |      converted to lowercase and vice versa.
 |  
 |  title(...)
 |      S.title() -> string
 |      
 |      Return a titlecased version of S, i.e. words start with uppercase
 |      characters, all remaining cased characters have lowercase.
 |  
 |  translate(...)
 |      S.translate(table [,deletechars]) -> string
 |      
 |      Return a copy of the string S, where all characters occurring
 |      in the optional argument deletechars are removed, and the
 |      remaining characters have been mapped through the given
 |      translation table, which must be a string of length 256 or None.
 |      If the table argument is None, no translation is applied and
 |      the operation simply removes the characters in deletechars.
 |  
 |  upper(...)
 |      S.upper() -> string
 |      
 |      Return a copy of the string S converted to uppercase.
 |  
 |  zfill(...)
 |      S.zfill(width) -> string
 |      
 |      Pad a numeric string S with zeros on the left, to fill a field
 |      of the specified width.  The string S is never truncated.
 |  
 |  ----------------------------------------------------------------------
 |  Data and other attributes defined here:
 |  
 |  __new__ = <built-in method __new__ of type object>
 |      T.__new__(S, ...) -> a new object with type S, a subtype of T


In [96]:
s1.find("oo")
Out[96]:
1
In [97]:
csv="foo,bar,baz"
In [101]:
values = csv.split(",")
In [102]:
values
Out[102]:
['foo', 'bar', 'baz']
In [99]:
sep = "|"
In [103]:
sep.join(values)
Out[103]:
'foo|bar|baz'
In [104]:
s = "foo    bar  baz      42"
In [105]:
s.split()
Out[105]:
['foo', 'bar', 'baz', '42']
In [106]:
s.split(" ")
Out[106]:
['foo', '', '', '', 'bar', '', 'baz', '', '', '', '', '', '42']
In [108]:
";".join(values)
Out[108]:
'foo;bar;baz'
In [109]:
";;;".join(values)
Out[109]:
'foo;;;bar;;;baz'
In [110]:
"foobar".islower()
Out[110]:
True
In [111]:
"foobar".isupper()
Out[111]:
False
In [112]:
"hello world".capitalize()
Out[112]:
'Hello world'

Sets

In [113]:
s = set()
In [114]:
s
Out[114]:
set()
In [115]:
s.add(5)
In [116]:
s.add(7)
In [117]:
s.add(10)
In [118]:
s
Out[118]:
{5, 7, 10}
In [119]:
s.add(5)
In [120]:
s
Out[120]:
{5, 7, 10}
In [121]:
s1 = set([1,2,3])
In [122]:
s2 = set([1,2,4])
In [123]:
s1 | s2
Out[123]:
{1, 2, 3, 4}
In [124]:
s1 ^ s2
Out[124]:
{3, 4}
In [125]:
s1 & s2
Out[125]:
{1, 2}
In [126]:
s1 - s2
Out[126]:
{3}
In [127]:
if 1 in s1:
    print "FOO"
FOO

In [128]:
l1 = [2,3,4,5,6,7,1]
In [129]:
if 1 in l1:
    print "FOO"
FOO

Dictionaries

In [130]:
d = {}
In [131]:
d["A"] = 4.0
In [132]:
d["A-"] = 3.7
In [133]:
d["B+"] = 3.3
In [134]:
d
Out[134]:
{'A': 4.0, 'A-': 3.7, 'B+': 3.3}
In [135]:
d2 = {'A': 4.0, 'A-': 3.7, 'B+': 3.3}
In [136]:
d3 = {"ten":10, "twenty":20, "thirty":30}
In [137]:
d["A"]
Out[137]:
4.0
In [138]:
d3["forty"]
---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
<ipython-input-138-11a1109fa28e> in <module>()
----> 1 d3["forty"]

KeyError: 'forty'
In [139]:
d3.has_key("forty")
Out[139]:
False
In [140]:
d3.has_key("thirty")
Out[140]:
True
In [141]:
d.keys()
Out[141]:
['A', 'A-', 'B+']
In [142]:
d3.keys()
Out[142]:
['twenty', 'thirty', 'ten']
In [143]:
d3.values()
Out[143]:
[20, 30, 10]
In [144]:
d3.items()
Out[144]:
[('twenty', 20), ('thirty', 30), ('ten', 10)]
In [145]:
for k,v in d3.items():
    print k,v
twenty 20
thirty 30
ten 10

Twitter Example

In [148]:
l = workshop.get_tweets(TOPTWEETS_FILE, 10)
In [149]:
len(l)
Out[149]:
10
In [150]:
l[0]
Out[150]:
{u'contributors': None,
 u'coordinates': None,
 u'created_at': u'Tue Nov 04 06:58:34 +0000 2014',
 u'entities': {u'hashtags': [],
  u'symbols': [],
  u'urls': [],
  u'user_mentions': []},
 u'favorite_count': 7557,
 u'favorited': False,
 u'geo': None,
 u'id': 529528109377519616,
 u'id_str': u'529528109377519616',
 u'in_reply_to_screen_name': None,
 u'in_reply_to_status_id': None,
 u'in_reply_to_status_id_str': None,
 u'in_reply_to_user_id': None,
 u'in_reply_to_user_id_str': None,
 u'lang': u'en',
 u'place': None,
 u'retweet_count': 6493,
 u'retweeted': False,
 u'source': u'<a href="http://twitter.com/download/iphone" rel="nofollow">Twitter for iPhone</a>',
 u'text': u'rt if u used to wear love spell',
 u'truncated': False,
 u'user': {u'contributors_enabled': False,
  u'created_at': u'Fri Feb 20 23:45:56 +0000 2009',
  u'default_profile': False,
  u'default_profile_image': False,
  u'description': u'CURRENTLY\u2728BEAMING\u2728ON THE PRISMATIC WORLD TOUR 2014!',
  u'entities': {u'description': {u'urls': []},
   u'url': {u'urls': [{u'display_url': u'katyperry.com',
      u'expanded_url': u'http://www.katyperry.com',
      u'indices': [0, 22],
      u'url': u'http://t.co/TUWZkUWWhw'}]}},
  u'favourites_count': 1184,
  u'follow_request_sent': False,
  u'followers_count': 59396644,
  u'following': False,
  u'friends_count': 158,
  u'geo_enabled': False,
  u'id': 21447363,
  u'id_str': u'21447363',
  u'is_translation_enabled': True,
  u'is_translator': False,
  u'lang': u'en',
  u'listed_count': 143473,
  u'location': u'',
  u'name': u'KATY PERRY ',
  u'notifications': False,
  u'profile_background_color': u'CECFBC',
  u'profile_background_image_url': u'http://pbs.twimg.com/profile_background_images/378800000168797027/kSZ-ewZo.jpeg',
  u'profile_background_image_url_https': u'https://pbs.twimg.com/profile_background_images/378800000168797027/kSZ-ewZo.jpeg',
  u'profile_background_tile': False,
  u'profile_banner_url': u'https://pbs.twimg.com/profile_banners/21447363/1401576937',
  u'profile_image_url': u'http://pbs.twimg.com/profile_images/423542935368380416/ryEG2fNO_normal.jpeg',
  u'profile_image_url_https': u'https://pbs.twimg.com/profile_images/423542935368380416/ryEG2fNO_normal.jpeg',
  u'profile_link_color': u'D55732',
  u'profile_location': None,
  u'profile_sidebar_border_color': u'FFFFFF',
  u'profile_sidebar_fill_color': u'78C0A8',
  u'profile_text_color': u'5E412F',
  u'profile_use_background_image': True,
  u'protected': False,
  u'screen_name': u'katyperry',
  u'statuses_count': 6158,
  u'time_zone': u'Alaska',
  u'url': u'http://t.co/TUWZkUWWhw',
  u'utc_offset': -32400,
  u'verified': True}}
In [151]:
tweet = l[0]
In [155]:
tweet["text"]
Out[155]:
u'rt if u used to wear love spell'
In [156]:
tweet["retweet_count"]
Out[156]:
6493
In [158]:
tweet["user"]["screen_name"]
Out[158]:
u'katyperry'
In [160]:
tweet["user"]["name"]
Out[160]:
u'KATY PERRY '
In [173]:
lengths = {}
for tweet in workshop.tweets(TOPTWEETS_FILE, 1000):
    l = len(tweet["text"])
    if not lengths.has_key(l):
        lengths[l] = 1
    else:
        lengths[l] = lengths[l] + 1
        
for k,v in lengths.items():
    print k,v
1 1
2 1
12 4
13 2
14 4
15 4
16 4
17 8
18 3
19 5
20 11
21 5
22 9
23 7
24 4
25 7
26 7
27 5
28 11
29 6
30 6
31 4
32 3
33 5
34 7
35 4
36 3
37 7
38 4
39 4
40 4
41 4
42 4
43 6
44 5
46 4
47 5
48 5
49 4
50 7
51 6
52 5
53 4
54 10
55 7
56 4
57 1
58 8
59 4
60 6
61 3
62 6
63 6
64 5
65 3
66 3
67 4
68 4
69 11
70 6
71 7
72 11
73 2
74 9
75 5
76 6
77 8
78 7
79 6
80 10
81 8
82 10
83 10
84 8
85 8
86 6
87 13
88 8
89 4
90 8
91 10
92 4
93 9
94 17
95 9
96 10
97 8
98 16
99 10
100 10
101 7
102 12
103 19
104 7
105 6
106 10
107 11
108 8
109 10
110 8
111 12
112 4
113 10
114 11
115 6
116 7
117 10
118 8
119 7
120 7
121 7
122 10
123 13
124 5
125 16
126 8
127 10
128 7
129 7
130 8
131 9
132 8
133 12
134 6
135 11
136 7
137 18
138 10
139 16
140 66

In [170]:
l = workshop.get_tweets(TOPTWEETS_FILE, 100)
In [172]:
l[97]["entities"]["hashtags"]
Out[172]:
[{u'indices': [0, 4], u'text': u'FBF'},
 {u'indices': [125, 139], u'text': u'ThisIsHowWeDo'}]
In [169]:
workshop.get_hashtags(l[97])
Out[169]:
[u'FBF', u'ThisIsHowWeDo']
In [192]:
import operator

hashtags = {}
for tweet in workshop.tweets(TOPTWEETS_FILE, 20000):
    hl = workshop.get_hashtags(tweet)
    for h in hl:
        if not hashtags.has_key(h):
            hashtags[h] = 1
        else:
            hashtags[h] = hashtags[h] + 1    

hashtags_freq = hashtags.items()
hashtags_freq.sort(key=operator.itemgetter(1), reverse=True)

for k,v in hashtags_freq[:10]:
    print k,v
TS1989 137
taylurking 107
XTheAlbum 103
ANTM 98
WeAreHere 83
GALAXYNote4 83
RMLive 72
KKHamptons 71
HalaMadrid 67
FCBlive 66

In [194]:
dict([(x,str(x)) for x in range(10)])
Out[194]:
{0: '0',
 1: '1',
 2: '2',
 3: '3',
 4: '4',
 5: '5',
 6: '6',
 7: '7',
 8: '8',
 9: '9'}

Functions

In [195]:
def multiply(a,b):
    return a*b
In [196]:
multiply(5,4)
Out[196]:
20
In [197]:
def incr(l):
    return [x+1 for x in l]
In [198]:
l = [1,2,3,4,5]
In [199]:
incr(l)
Out[199]:
[2, 3, 4, 5, 6]
In [203]:
def incr(l, by=1):
    return [x+by for x in l]
In [206]:
incr(l, by=5)
Out[206]:
[6, 7, 8, 9, 10]
In [207]:
def minusplus(x):
    return x-1, x+1
In [208]:
minusplus(5)
Out[208]:
(4, 6)
In [210]:
x, y = minusplus(5)
In [211]:
x
Out[211]:
4
In [212]:
y
Out[212]:
6

Twitter Example

In [213]:
import functions
In [216]:
functions.extract_lengths(TOPTWEETS_FILE, 100)
Out[216]:
[31,
 51,
 53,
 79,
 25,
 74,
 66,
 14,
 140,
 93,
 40,
 67,
 39,
 123,
 34,
 30,
 125,
 97,
 83,
 140,
 111,
 133,
 53,
 139,
 84,
 77,
 75,
 118,
 70,
 107,
 111,
 97,
 83,
 43,
 140,
 69,
 138,
 140,
 124,
 138,
 125,
 59,
 81,
 29,
 81,
 129,
 104,
 139,
 46,
 33,
 21,
 109,
 100,
 113,
 41,
 139,
 66,
 140,
 79,
 69,
 134,
 50,
 135,
 126,
 102,
 85,
 84,
 134,
 126,
 51,
 139,
 140,
 114,
 125,
 140,
 140,
 132,
 97,
 140,
 125,
 140,
 33,
 130,
 135,
 139,
 27,
 101,
 125,
 131,
 131,
 110,
 102,
 54,
 128,
 42,
 76,
 36,
 139,
 94,
 131]

Simple Visualizations

In [217]:
#IPython Notebook-specific
%matplotlib inline
In [218]:
import matplotlib.pylab as plt
In [219]:
ys = [10,5,7,20,-5,7]
In [220]:
plt.plot(ys)
Out[220]:
[<matplotlib.lines.Line2D at 0x7f537d9f1b50>]
In [221]:
xs=[1,2,7,8,12,50]
In [222]:
plt.plot(xs,ys)
Out[222]:
[<matplotlib.lines.Line2D at 0x7f537d98ac90>]
In [224]:
plt.xlabel("Some x values")
plt.ylabel("Some y values")
plt.plot(xs,ys)
Out[224]:
[<matplotlib.lines.Line2D at 0x7f537d7abbd0>]
In [225]:
import random
random_xs = [random.uniform(-10,10) for x in range(100)]
random_ys = [random.uniform(-10,10) for x in range(100)]
In [229]:
plt.plot(random_xs, random_ys, "+", color="red")
Out[229]:
[<matplotlib.lines.Line2D at 0x7f537d5bc690>]
In [230]:
norm_values = [random.normalvariate(10,3) for x in range(10000)]
In [232]:
h = plt.hist(norm_values, bins=100)
In [240]:
import math

xs = [x*0.1 for x in range(-200,200)]
sin_ys = [math.sin(x) for x in xs]
cos_ys = [math.cos(x) for x in xs]

plt.plot(xs, sin_ys, color="red", label="sin(x)")
plt.plot(xs, cos_ys, color="blue", label="cos(x)")
plt.ylim(-2, 2)
plt.legend()
Out[240]:
<matplotlib.legend.Legend at 0x7f537cffa7d0>
In [241]:
from exercise_functions import extract_values
In [246]:
top_retweets = extract_values(TOPTWEETS_FILE, 20000, "retweet_count")
In [249]:
h = plt.hist(top_retweets, bins=50, log=True)
In []: