Introduction to Python¶

%autosave 5

Autosaving every 5 seconds

Lists¶

lang = ["C", "C++", "Python", "Java"]

nums = [3,187,1232,53,21398]

print lang

['C', 'C++', 'Python', 'Java']

print nums

[3, 187, 1232, 53, 21398]

l = ["foobar", 4, 5.0, True]

lang[2]

'Python'

lang[4]

---------------------------------------------------------------------------
IndexError                                Traceback (most recent call last)
<ipython-input-9-e83aaddb2283> in <module>()
----> 1 lang[4]

IndexError: list index out of range

lang[-1]

'Java'

lang[-2]

'Python'

lang[0:2]

['C', 'C++']

lang[0:4:2]

['C', 'Python']

len(lang)

4

range(10)

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]

range(0,10,2)

[0, 2, 4, 6, 8]

range(4,8)

[4, 5, 6, 7]

range(len(lang))

[0, 1, 2, 3]

# Not pythonic
for i in range(len(lang)):
    print lang[i]

C
C++
Python
Java

for l in lang:
    print l

C
C++
Python
Java

lang.append("Scheme")

lang

['C', 'C++', 'Python', 'Java', 'Scheme']

lang2 = ["Pascal", "FORTRAN"]

lang + lang2

['C', 'C++', 'Python', 'Java', 'Scheme', 'Pascal', 'FORTRAN']

lang.extend(lang2)

lang

['C', 'C++', 'Python', 'Java', 'Scheme', 'Pascal', 'FORTRAN']

lang.insert(2, "Haskell")

lang

['C', 'C++', 'Haskell', 'Python', 'Java', 'Scheme', 'Pascal', 'FORTRAN']

del lang[2]

lang

['C', 'C++', 'Python', 'Java', 'Scheme', 'Pascal', 'FORTRAN']

lang.pop()

'FORTRAN'

lang

['C', 'C++', 'Python', 'Java', 'Scheme', 'Pascal']

lang.pop(2)

'Python'

nums

[3, 187, 1232, 53, 21398]

max(nums)

21398

min(nums)

3

lang.count("Java")

1

lang.reverse()

lang

['Pascal', 'Scheme', 'Java', 'C++', 'C']

lang.sort()

lang

['C', 'C++', 'Java', 'Pascal', 'Scheme']

help(lang)

Help on list object:

class list(object)
 |  list() -> new empty list
 |  list(iterable) -> new list initialized from iterable's items
 |  
 |  Methods defined here:
 |  
 |  __add__(...)
 |      x.__add__(y) <==> x+y
 |  
 |  __contains__(...)
 |      x.__contains__(y) <==> y in x
 |  
 |  __delitem__(...)
 |      x.__delitem__(y) <==> del x[y]
 |  
 |  __delslice__(...)
 |      x.__delslice__(i, j) <==> del x[i:j]
 |      
 |      Use of negative indices is not supported.
 |  
 |  __eq__(...)
 |      x.__eq__(y) <==> x==y
 |  
 |  __ge__(...)
 |      x.__ge__(y) <==> x>=y
 |  
 |  __getattribute__(...)
 |      x.__getattribute__('name') <==> x.name
 |  
 |  __getitem__(...)
 |      x.__getitem__(y) <==> x[y]
 |  
 |  __getslice__(...)
 |      x.__getslice__(i, j) <==> x[i:j]
 |      
 |      Use of negative indices is not supported.
 |  
 |  __gt__(...)
 |      x.__gt__(y) <==> x>y
 |  
 |  __iadd__(...)
 |      x.__iadd__(y) <==> x+=y
 |  
 |  __imul__(...)
 |      x.__imul__(y) <==> x*=y
 |  
 |  __init__(...)
 |      x.__init__(...) initializes x; see help(type(x)) for signature
 |  
 |  __iter__(...)
 |      x.__iter__() <==> iter(x)
 |  
 |  __le__(...)
 |      x.__le__(y) <==> x<=y
 |  
 |  __len__(...)
 |      x.__len__() <==> len(x)
 |  
 |  __lt__(...)
 |      x.__lt__(y) <==> x<y
 |  
 |  __mul__(...)
 |      x.__mul__(n) <==> x*n
 |  
 |  __ne__(...)
 |      x.__ne__(y) <==> x!=y
 |  
 |  __repr__(...)
 |      x.__repr__() <==> repr(x)
 |  
 |  __reversed__(...)
 |      L.__reversed__() -- return a reverse iterator over the list
 |  
 |  __rmul__(...)
 |      x.__rmul__(n) <==> n*x
 |  
 |  __setitem__(...)
 |      x.__setitem__(i, y) <==> x[i]=y
 |  
 |  __setslice__(...)
 |      x.__setslice__(i, j, y) <==> x[i:j]=y
 |      
 |      Use  of negative indices is not supported.
 |  
 |  __sizeof__(...)
 |      L.__sizeof__() -- size of L in memory, in bytes
 |  
 |  append(...)
 |      L.append(object) -- append object to end
 |  
 |  count(...)
 |      L.count(value) -> integer -- return number of occurrences of value
 |  
 |  extend(...)
 |      L.extend(iterable) -- extend list by appending elements from the iterable
 |  
 |  index(...)
 |      L.index(value, [start, [stop]]) -> integer -- return first index of value.
 |      Raises ValueError if the value is not present.
 |  
 |  insert(...)
 |      L.insert(index, object) -- insert object before index
 |  
 |  pop(...)
 |      L.pop([index]) -> item -- remove and return item at index (default last).
 |      Raises IndexError if list is empty or index is out of range.
 |  
 |  remove(...)
 |      L.remove(value) -- remove first occurrence of value.
 |      Raises ValueError if the value is not present.
 |  
 |  reverse(...)
 |      L.reverse() -- reverse *IN PLACE*
 |  
 |  sort(...)
 |      L.sort(cmp=None, key=None, reverse=False) -- stable sort *IN PLACE*;
 |      cmp(x, y) -> -1, 0, 1
 |  
 |  ----------------------------------------------------------------------
 |  Data and other attributes defined here:
 |  
 |  __hash__ = None
 |  
 |  __new__ = <built-in method __new__ of type object>
 |      T.__new__(S, ...) -> a new object with type S, a subtype of T

Twitter Example¶

import workshop

TOPTWEETS_FILE = "top_tweets.json"
TWEETS_FILE = "tweets.json"

workshop.get_tweets_text(TOPTWEETS_FILE, 10)

[u'rt if u used to wear love spell',
 u'TWO SUMMERS IN ONE YEAR?! THANKS AUSTRALIA!\U0001f334\u2600\ufe0f\U0001f357\U0001f385\u2600\ufe0f\U0001f334',
 u'\U0001f64f heaven is a place on earth \U0001f64f http://t.co/BcKOCys75p',
 u"The more fuzzy stuff you wear the more he'll think you're an actual teddy bear.",
 u'hi http://t.co/ihlaO8XyUF',
 u'RT @WORLDSTARVlNE: OMG @katyperry HALLOWEEN COSTUME http://t.co/lN7y2BMGGJ',
 u'RT @WorIdStarComedy: Katy Perry the realest http://t.co/l9zTSk6egI',
 u'\U0001f383whore-o-ween\U0001f383',
 u'Lay me down on ur flames @ChesterCheetah: @katyperry If u think u can make me fall in \u2764\ufe0f by dressing as a Flamin Hot Cheeto, ur 100% correct',
 u'*WIPES CHEETO DUST OFF SHOULDERS* \u201c@FLApfkp: u da real mvp @katyperry http://t.co/9PGvGxEyvR\u201d']

tweets = workshop.get_tweets_text(TOPTWEETS_FILE, 10)

len(tweets)

10

tweets[0]

u'rt if u used to wear love spell'

for t in tweets:
    print len(t)

31
51
53
79
25
74
66
14
140
93

import math

math.sqrt(2)

1.4142135623730951

List Comprehensions¶

l = [100,200,300,400,500]

l2 = []

for x in l:
    l2.append(x+1)

l2

[101, 201, 301, 401, 501]

[x for x in l]

[100, 200, 300, 400, 500]

[x+1 for x in l]

[101, 201, 301, 401, 501]

l3 = [1,3,4,7,6,10,11]

l2 = []
for x in l3:
    if x%2 == 0:
        l2.append(x)

l2

[4, 6, 10]

[x for x in l3 if x%2==0]

[4, 6, 10]

l1 = [1,2,3,4,5]
l2 = [100,200,300,400,500]

l3 = []
# Not pythonic
for i in range(len(l1)):
    l3.append(l1[i] + l2[i])

l3

[101, 202, 303, 404, 505]

zl = zip(l1,l2)

zl

[(1, 100), (2, 200), (3, 300), (4, 400), (5, 500)]

for x, y in zl:
    print x, y

1 100
2 200
3 300
4 400
5 500

l3 = []
for x, y in zip(l1,l2):
    l3.append(x+y)

l3

[101, 202, 303, 404, 505]

[x+y for x,y in zip(l1,l2)]

[101, 202, 303, 404, 505]

Tuples¶

a = [3,5]

b = (10,42)

len(b)

2

min(b)

10

max(b)

42

b[0]

10

a[0] = 55

a

[55, 5]

b[0] = 37

---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-87-d829f3437425> in <module>()
----> 1 b[0] = 37

TypeError: 'tuple' object does not support item assignment

b.append(10)

---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-88-37a3cd709306> in <module>()
----> 1 b.append(10)

AttributeError: 'tuple' object has no attribute 'append'

Strings¶

s1 = "foobar"

s2 = "foobar"

len(s1)

6

s1[3]

'b'

s1[1:5]

'ooba'

s1 == s2

True

help(str)

Help on class str in module __builtin__:

class str(basestring)
 |  str(object='') -> string
 |  
 |  Return a nice string representation of the object.
 |  If the argument is a string, the return value is the same object.
 |  
 |  Method resolution order:
 |      str
 |      basestring
 |      object
 |  
 |  Methods defined here:
 |  
 |  __add__(...)
 |      x.__add__(y) <==> x+y
 |  
 |  __contains__(...)
 |      x.__contains__(y) <==> y in x
 |  
 |  __eq__(...)
 |      x.__eq__(y) <==> x==y
 |  
 |  __format__(...)
 |      S.__format__(format_spec) -> string
 |      
 |      Return a formatted version of S as described by format_spec.
 |  
 |  __ge__(...)
 |      x.__ge__(y) <==> x>=y
 |  
 |  __getattribute__(...)
 |      x.__getattribute__('name') <==> x.name
 |  
 |  __getitem__(...)
 |      x.__getitem__(y) <==> x[y]
 |  
 |  __getnewargs__(...)
 |  
 |  __getslice__(...)
 |      x.__getslice__(i, j) <==> x[i:j]
 |      
 |      Use of negative indices is not supported.
 |  
 |  __gt__(...)
 |      x.__gt__(y) <==> x>y
 |  
 |  __hash__(...)
 |      x.__hash__() <==> hash(x)
 |  
 |  __le__(...)
 |      x.__le__(y) <==> x<=y
 |  
 |  __len__(...)
 |      x.__len__() <==> len(x)
 |  
 |  __lt__(...)
 |      x.__lt__(y) <==> x<y
 |  
 |  __mod__(...)
 |      x.__mod__(y) <==> x%y
 |  
 |  __mul__(...)
 |      x.__mul__(n) <==> x*n
 |  
 |  __ne__(...)
 |      x.__ne__(y) <==> x!=y
 |  
 |  __repr__(...)
 |      x.__repr__() <==> repr(x)
 |  
 |  __rmod__(...)
 |      x.__rmod__(y) <==> y%x
 |  
 |  __rmul__(...)
 |      x.__rmul__(n) <==> n*x
 |  
 |  __sizeof__(...)
 |      S.__sizeof__() -> size of S in memory, in bytes
 |  
 |  __str__(...)
 |      x.__str__() <==> str(x)
 |  
 |  capitalize(...)
 |      S.capitalize() -> string
 |      
 |      Return a copy of the string S with only its first character
 |      capitalized.
 |  
 |  center(...)
 |      S.center(width[, fillchar]) -> string
 |      
 |      Return S centered in a string of length width. Padding is
 |      done using the specified fill character (default is a space)
 |  
 |  count(...)
 |      S.count(sub[, start[, end]]) -> int
 |      
 |      Return the number of non-overlapping occurrences of substring sub in
 |      string S[start:end].  Optional arguments start and end are interpreted
 |      as in slice notation.
 |  
 |  decode(...)
 |      S.decode([encoding[,errors]]) -> object
 |      
 |      Decodes S using the codec registered for encoding. encoding defaults
 |      to the default encoding. errors may be given to set a different error
 |      handling scheme. Default is 'strict' meaning that encoding errors raise
 |      a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'
 |      as well as any other name registered with codecs.register_error that is
 |      able to handle UnicodeDecodeErrors.
 |  
 |  encode(...)
 |      S.encode([encoding[,errors]]) -> object
 |      
 |      Encodes S using the codec registered for encoding. encoding defaults
 |      to the default encoding. errors may be given to set a different error
 |      handling scheme. Default is 'strict' meaning that encoding errors raise
 |      a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and
 |      'xmlcharrefreplace' as well as any other name registered with
 |      codecs.register_error that is able to handle UnicodeEncodeErrors.
 |  
 |  endswith(...)
 |      S.endswith(suffix[, start[, end]]) -> bool
 |      
 |      Return True if S ends with the specified suffix, False otherwise.
 |      With optional start, test S beginning at that position.
 |      With optional end, stop comparing S at that position.
 |      suffix can also be a tuple of strings to try.
 |  
 |  expandtabs(...)
 |      S.expandtabs([tabsize]) -> string
 |      
 |      Return a copy of S where all tab characters are expanded using spaces.
 |      If tabsize is not given, a tab size of 8 characters is assumed.
 |  
 |  find(...)
 |      S.find(sub [,start [,end]]) -> int
 |      
 |      Return the lowest index in S where substring sub is found,
 |      such that sub is contained within S[start:end].  Optional
 |      arguments start and end are interpreted as in slice notation.
 |      
 |      Return -1 on failure.
 |  
 |  format(...)
 |      S.format(*args, **kwargs) -> string
 |      
 |      Return a formatted version of S, using substitutions from args and kwargs.
 |      The substitutions are identified by braces ('{' and '}').
 |  
 |  index(...)
 |      S.index(sub [,start [,end]]) -> int
 |      
 |      Like S.find() but raise ValueError when the substring is not found.
 |  
 |  isalnum(...)
 |      S.isalnum() -> bool
 |      
 |      Return True if all characters in S are alphanumeric
 |      and there is at least one character in S, False otherwise.
 |  
 |  isalpha(...)
 |      S.isalpha() -> bool
 |      
 |      Return True if all characters in S are alphabetic
 |      and there is at least one character in S, False otherwise.
 |  
 |  isdigit(...)
 |      S.isdigit() -> bool
 |      
 |      Return True if all characters in S are digits
 |      and there is at least one character in S, False otherwise.
 |  
 |  islower(...)
 |      S.islower() -> bool
 |      
 |      Return True if all cased characters in S are lowercase and there is
 |      at least one cased character in S, False otherwise.
 |  
 |  isspace(...)
 |      S.isspace() -> bool
 |      
 |      Return True if all characters in S are whitespace
 |      and there is at least one character in S, False otherwise.
 |  
 |  istitle(...)
 |      S.istitle() -> bool
 |      
 |      Return True if S is a titlecased string and there is at least one
 |      character in S, i.e. uppercase characters may only follow uncased
 |      characters and lowercase characters only cased ones. Return False
 |      otherwise.
 |  
 |  isupper(...)
 |      S.isupper() -> bool
 |      
 |      Return True if all cased characters in S are uppercase and there is
 |      at least one cased character in S, False otherwise.
 |  
 |  join(...)
 |      S.join(iterable) -> string
 |      
 |      Return a string which is the concatenation of the strings in the
 |      iterable.  The separator between elements is S.
 |  
 |  ljust(...)
 |      S.ljust(width[, fillchar]) -> string
 |      
 |      Return S left-justified in a string of length width. Padding is
 |      done using the specified fill character (default is a space).
 |  
 |  lower(...)
 |      S.lower() -> string
 |      
 |      Return a copy of the string S converted to lowercase.
 |  
 |  lstrip(...)
 |      S.lstrip([chars]) -> string or unicode
 |      
 |      Return a copy of the string S with leading whitespace removed.
 |      If chars is given and not None, remove characters in chars instead.
 |      If chars is unicode, S will be converted to unicode before stripping
 |  
 |  partition(...)
 |      S.partition(sep) -> (head, sep, tail)
 |      
 |      Search for the separator sep in S, and return the part before it,
 |      the separator itself, and the part after it.  If the separator is not
 |      found, return S and two empty strings.
 |  
 |  replace(...)
 |      S.replace(old, new[, count]) -> string
 |      
 |      Return a copy of string S with all occurrences of substring
 |      old replaced by new.  If the optional argument count is
 |      given, only the first count occurrences are replaced.
 |  
 |  rfind(...)
 |      S.rfind(sub [,start [,end]]) -> int
 |      
 |      Return the highest index in S where substring sub is found,
 |      such that sub is contained within S[start:end].  Optional
 |      arguments start and end are interpreted as in slice notation.
 |      
 |      Return -1 on failure.
 |  
 |  rindex(...)
 |      S.rindex(sub [,start [,end]]) -> int
 |      
 |      Like S.rfind() but raise ValueError when the substring is not found.
 |  
 |  rjust(...)
 |      S.rjust(width[, fillchar]) -> string
 |      
 |      Return S right-justified in a string of length width. Padding is
 |      done using the specified fill character (default is a space)
 |  
 |  rpartition(...)
 |      S.rpartition(sep) -> (head, sep, tail)
 |      
 |      Search for the separator sep in S, starting at the end of S, and return
 |      the part before it, the separator itself, and the part after it.  If the
 |      separator is not found, return two empty strings and S.
 |  
 |  rsplit(...)
 |      S.rsplit([sep [,maxsplit]]) -> list of strings
 |      
 |      Return a list of the words in the string S, using sep as the
 |      delimiter string, starting at the end of the string and working
 |      to the front.  If maxsplit is given, at most maxsplit splits are
 |      done. If sep is not specified or is None, any whitespace string
 |      is a separator.
 |  
 |  rstrip(...)
 |      S.rstrip([chars]) -> string or unicode
 |      
 |      Return a copy of the string S with trailing whitespace removed.
 |      If chars is given and not None, remove characters in chars instead.
 |      If chars is unicode, S will be converted to unicode before stripping
 |  
 |  split(...)
 |      S.split([sep [,maxsplit]]) -> list of strings
 |      
 |      Return a list of the words in the string S, using sep as the
 |      delimiter string.  If maxsplit is given, at most maxsplit
 |      splits are done. If sep is not specified or is None, any
 |      whitespace string is a separator and empty strings are removed
 |      from the result.
 |  
 |  splitlines(...)
 |      S.splitlines(keepends=False) -> list of strings
 |      
 |      Return a list of the lines in S, breaking at line boundaries.
 |      Line breaks are not included in the resulting list unless keepends
 |      is given and true.
 |  
 |  startswith(...)
 |      S.startswith(prefix[, start[, end]]) -> bool
 |      
 |      Return True if S starts with the specified prefix, False otherwise.
 |      With optional start, test S beginning at that position.
 |      With optional end, stop comparing S at that position.
 |      prefix can also be a tuple of strings to try.
 |  
 |  strip(...)
 |      S.strip([chars]) -> string or unicode
 |      
 |      Return a copy of the string S with leading and trailing
 |      whitespace removed.
 |      If chars is given and not None, remove characters in chars instead.
 |      If chars is unicode, S will be converted to unicode before stripping
 |  
 |  swapcase(...)
 |      S.swapcase() -> string
 |      
 |      Return a copy of the string S with uppercase characters
 |      converted to lowercase and vice versa.
 |  
 |  title(...)
 |      S.title() -> string
 |      
 |      Return a titlecased version of S, i.e. words start with uppercase
 |      characters, all remaining cased characters have lowercase.
 |  
 |  translate(...)
 |      S.translate(table [,deletechars]) -> string
 |      
 |      Return a copy of the string S, where all characters occurring
 |      in the optional argument deletechars are removed, and the
 |      remaining characters have been mapped through the given
 |      translation table, which must be a string of length 256 or None.
 |      If the table argument is None, no translation is applied and
 |      the operation simply removes the characters in deletechars.
 |  
 |  upper(...)
 |      S.upper() -> string
 |      
 |      Return a copy of the string S converted to uppercase.
 |  
 |  zfill(...)
 |      S.zfill(width) -> string
 |      
 |      Pad a numeric string S with zeros on the left, to fill a field
 |      of the specified width.  The string S is never truncated.
 |  
 |  ----------------------------------------------------------------------
 |  Data and other attributes defined here:
 |  
 |  __new__ = <built-in method __new__ of type object>
 |      T.__new__(S, ...) -> a new object with type S, a subtype of T

s1.find("oo")

1

csv="foo,bar,baz"

values = csv.split(",")

values

['foo', 'bar', 'baz']

sep = "|"

sep.join(values)

'foo|bar|baz'

s = "foo    bar  baz      42"

s.split()

['foo', 'bar', 'baz', '42']

s.split(" ")

['foo', '', '', '', 'bar', '', 'baz', '', '', '', '', '', '42']

";".join(values)

'foo;bar;baz'

";;;".join(values)

'foo;;;bar;;;baz'

"foobar".islower()

True

"foobar".isupper()

False

"hello world".capitalize()

'Hello world'

Sets¶

s = set()

s

set()

s.add(5)

s.add(7)

s.add(10)

s

{5, 7, 10}

s.add(5)

s

{5, 7, 10}

s1 = set([1,2,3])

s2 = set([1,2,4])

s1 | s2

{1, 2, 3, 4}

s1 ^ s2

{3, 4}

s1 & s2

{1, 2}

s1 - s2

{3}

if 1 in s1:
    print "FOO"

FOO

l1 = [2,3,4,5,6,7,1]

if 1 in l1:
    print "FOO"

FOO

Dictionaries¶

d = {}

d["A"] = 4.0

d["A-"] = 3.7

d["B+"] = 3.3

d

{'A': 4.0, 'A-': 3.7, 'B+': 3.3}

d2 = {'A': 4.0, 'A-': 3.7, 'B+': 3.3}

d3 = {"ten":10, "twenty":20, "thirty":30}

d["A"]

4.0

d3["forty"]

---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
<ipython-input-138-11a1109fa28e> in <module>()
----> 1 d3["forty"]

KeyError: 'forty'

d3.has_key("forty")

False

d3.has_key("thirty")

True

d.keys()

['A', 'A-', 'B+']

d3.keys()

['twenty', 'thirty', 'ten']

d3.values()

[20, 30, 10]

d3.items()

[('twenty', 20), ('thirty', 30), ('ten', 10)]

for k,v in d3.items():
    print k,v

twenty 20
thirty 30
ten 10

Twitter Example¶

l = workshop.get_tweets(TOPTWEETS_FILE, 10)

len(l)

10

l[0]

{u'contributors': None,
 u'coordinates': None,
 u'created_at': u'Tue Nov 04 06:58:34 +0000 2014',
 u'entities': {u'hashtags': [],
  u'symbols': [],
  u'urls': [],
  u'user_mentions': []},
 u'favorite_count': 7557,
 u'favorited': False,
 u'geo': None,
 u'id': 529528109377519616,
 u'id_str': u'529528109377519616',
 u'in_reply_to_screen_name': None,
 u'in_reply_to_status_id': None,
 u'in_reply_to_status_id_str': None,
 u'in_reply_to_user_id': None,
 u'in_reply_to_user_id_str': None,
 u'lang': u'en',
 u'place': None,
 u'retweet_count': 6493,
 u'retweeted': False,
 u'source': u'<a href="http://twitter.com/download/iphone" rel="nofollow">Twitter for iPhone</a>',
 u'text': u'rt if u used to wear love spell',
 u'truncated': False,
 u'user': {u'contributors_enabled': False,
  u'created_at': u'Fri Feb 20 23:45:56 +0000 2009',
  u'default_profile': False,
  u'default_profile_image': False,
  u'description': u'CURRENTLY\u2728BEAMING\u2728ON THE PRISMATIC WORLD TOUR 2014!',
  u'entities': {u'description': {u'urls': []},
   u'url': {u'urls': [{u'display_url': u'katyperry.com',
      u'expanded_url': u'http://www.katyperry.com',
      u'indices': [0, 22],
      u'url': u'http://t.co/TUWZkUWWhw'}]}},
  u'favourites_count': 1184,
  u'follow_request_sent': False,
  u'followers_count': 59396644,
  u'following': False,
  u'friends_count': 158,
  u'geo_enabled': False,
  u'id': 21447363,
  u'id_str': u'21447363',
  u'is_translation_enabled': True,
  u'is_translator': False,
  u'lang': u'en',
  u'listed_count': 143473,
  u'location': u'',
  u'name': u'KATY PERRY ',
  u'notifications': False,
  u'profile_background_color': u'CECFBC',
  u'profile_background_image_url': u'http://pbs.twimg.com/profile_background_images/378800000168797027/kSZ-ewZo.jpeg',
  u'profile_background_image_url_https': u'https://pbs.twimg.com/profile_background_images/378800000168797027/kSZ-ewZo.jpeg',
  u'profile_background_tile': False,
  u'profile_banner_url': u'https://pbs.twimg.com/profile_banners/21447363/1401576937',
  u'profile_image_url': u'http://pbs.twimg.com/profile_images/423542935368380416/ryEG2fNO_normal.jpeg',
  u'profile_image_url_https': u'https://pbs.twimg.com/profile_images/423542935368380416/ryEG2fNO_normal.jpeg',
  u'profile_link_color': u'D55732',
  u'profile_location': None,
  u'profile_sidebar_border_color': u'FFFFFF',
  u'profile_sidebar_fill_color': u'78C0A8',
  u'profile_text_color': u'5E412F',
  u'profile_use_background_image': True,
  u'protected': False,
  u'screen_name': u'katyperry',
  u'statuses_count': 6158,
  u'time_zone': u'Alaska',
  u'url': u'http://t.co/TUWZkUWWhw',
  u'utc_offset': -32400,
  u'verified': True}}

tweet = l[0]

tweet["text"]

u'rt if u used to wear love spell'

tweet["retweet_count"]

6493

tweet["user"]["screen_name"]

u'katyperry'

tweet["user"]["name"]

u'KATY PERRY '

lengths = {}
for tweet in workshop.tweets(TOPTWEETS_FILE, 1000):
    l = len(tweet["text"])
    if not lengths.has_key(l):
        lengths[l] = 1
    else:
        lengths[l] = lengths[l] + 1
        
for k,v in lengths.items():
    print k,v

1 1
2 1
12 4
13 2
14 4
15 4
16 4
17 8
18 3
19 5
20 11
21 5
22 9
23 7
24 4
25 7
26 7
27 5
28 11
29 6
30 6
31 4
32 3
33 5
34 7
35 4
36 3
37 7
38 4
39 4
40 4
41 4
42 4
43 6
44 5
46 4
47 5
48 5
49 4
50 7
51 6
52 5
53 4
54 10
55 7
56 4
57 1
58 8
59 4
60 6
61 3
62 6
63 6
64 5
65 3
66 3
67 4
68 4
69 11
70 6
71 7
72 11
73 2
74 9
75 5
76 6
77 8
78 7
79 6
80 10
81 8
82 10
83 10
84 8
85 8
86 6
87 13
88 8
89 4
90 8
91 10
92 4
93 9
94 17
95 9
96 10
97 8
98 16
99 10
100 10
101 7
102 12
103 19
104 7
105 6
106 10
107 11
108 8
109 10
110 8
111 12
112 4
113 10
114 11
115 6
116 7
117 10
118 8
119 7
120 7
121 7
122 10
123 13
124 5
125 16
126 8
127 10
128 7
129 7
130 8
131 9
132 8
133 12
134 6
135 11
136 7
137 18
138 10
139 16
140 66

l = workshop.get_tweets(TOPTWEETS_FILE, 100)

l[97]["entities"]["hashtags"]

[{u'indices': [0, 4], u'text': u'FBF'},
 {u'indices': [125, 139], u'text': u'ThisIsHowWeDo'}]

workshop.get_hashtags(l[97])

[u'FBF', u'ThisIsHowWeDo']

import operator

hashtags = {}
for tweet in workshop.tweets(TOPTWEETS_FILE, 20000):
    hl = workshop.get_hashtags(tweet)
    for h in hl:
        if not hashtags.has_key(h):
            hashtags[h] = 1
        else:
            hashtags[h] = hashtags[h] + 1    

hashtags_freq = hashtags.items()
hashtags_freq.sort(key=operator.itemgetter(1), reverse=True)

for k,v in hashtags_freq[:10]:
    print k,v

TS1989 137
taylurking 107
XTheAlbum 103
ANTM 98
WeAreHere 83
GALAXYNote4 83
RMLive 72
KKHamptons 71
HalaMadrid 67
FCBlive 66

dict([(x,str(x)) for x in range(10)])

{0: '0',
 1: '1',
 2: '2',
 3: '3',
 4: '4',
 5: '5',
 6: '6',
 7: '7',
 8: '8',
 9: '9'}

Functions¶

def multiply(a,b):
    return a*b

multiply(5,4)

20

def incr(l):
    return [x+1 for x in l]

l = [1,2,3,4,5]

incr(l)

[2, 3, 4, 5, 6]

def incr(l, by=1):
    return [x+by for x in l]

incr(l, by=5)

[6, 7, 8, 9, 10]

def minusplus(x):
    return x-1, x+1

minusplus(5)

(4, 6)

x, y = minusplus(5)

x

4

y

6

Twitter Example¶

import functions

functions.extract_lengths(TOPTWEETS_FILE, 100)

[31,
 51,
 53,
 79,
 25,
 74,
 66,
 14,
 140,
 93,
 40,
 67,
 39,
 123,
 34,
 30,
 125,
 97,
 83,
 140,
 111,
 133,
 53,
 139,
 84,
 77,
 75,
 118,
 70,
 107,
 111,
 97,
 83,
 43,
 140,
 69,
 138,
 140,
 124,
 138,
 125,
 59,
 81,
 29,
 81,
 129,
 104,
 139,
 46,
 33,
 21,
 109,
 100,
 113,
 41,
 139,
 66,
 140,
 79,
 69,
 134,
 50,
 135,
 126,
 102,
 85,
 84,
 134,
 126,
 51,
 139,
 140,
 114,
 125,
 140,
 140,
 132,
 97,
 140,
 125,
 140,
 33,
 130,
 135,
 139,
 27,
 101,
 125,
 131,
 131,
 110,
 102,
 54,
 128,
 42,
 76,
 36,
 139,
 94,
 131]

Simple Visualizations¶

#IPython Notebook-specific
%matplotlib inline

import matplotlib.pylab as plt

ys = [10,5,7,20,-5,7]

plt.plot(ys)

[<matplotlib.lines.Line2D at 0x7f537d9f1b50>]

xs=[1,2,7,8,12,50]

plt.plot(xs,ys)

[<matplotlib.lines.Line2D at 0x7f537d98ac90>]

plt.xlabel("Some x values")
plt.ylabel("Some y values")
plt.plot(xs,ys)

[<matplotlib.lines.Line2D at 0x7f537d7abbd0>]

import random
random_xs = [random.uniform(-10,10) for x in range(100)]
random_ys = [random.uniform(-10,10) for x in range(100)]

plt.plot(random_xs, random_ys, "+", color="red")

[<matplotlib.lines.Line2D at 0x7f537d5bc690>]

norm_values = [random.normalvariate(10,3) for x in range(10000)]

h = plt.hist(norm_values, bins=100)

import math

xs = [x*0.1 for x in range(-200,200)]
sin_ys = [math.sin(x) for x in xs]
cos_ys = [math.cos(x) for x in xs]

plt.plot(xs, sin_ys, color="red", label="sin(x)")
plt.plot(xs, cos_ys, color="blue", label="cos(x)")
plt.ylim(-2, 2)
plt.legend()

<matplotlib.legend.Legend at 0x7f537cffa7d0>

from exercise_functions import extract_values

top_retweets = extract_values(TOPTWEETS_FILE, 20000, "retweet_count")

h = plt.hist(top_retweets, bins=50, log=True)