Curso - Python For Data Science - IBM Cognitive Class¶

Formação: Applied Data Science With Python

Victor Hugo Negrisoli - Desenvolvedor de Software Full-Stack & Analista de Dados

Módulo 1 - Python Basics

In [ ]:
print("Hello\nWorld!")
In [9]:
type('5668.413')
Out[9]:
str
In [10]:
type(True)
Out[10]:
bool
In [4]:
 int(1.0)
Out[4]:
1
In [21]:
# Usar // irá dividir para um int
25 // 5
Out[21]:
5
In [28]:
30 * (2 * 60)
Out[28]:
3600
In [52]:
indices = "O índice"
len(indices)
Out[52]:
8
In [62]:
concat = " é top"
(3 * indices) + 2 * concat
Out[62]:
'O índiceO índiceO índice é top é top'
In [63]:
print("Teste pular\nlinha")
Teste pular
linha
In [61]:
print("Teste tab\tlinha")
Teste tab	linha
In [66]:
print("Uma barra\\ no meio da String!")
Uma barra\ no meio da String!
In [68]:
upper = "upper case?"
In [69]:
upper.upper()
Out[69]:
'UPPER CASE?'
In [71]:
upper.replace("upper", "LOWER")
Out[71]:
'LOWER case?'
In [75]:
upper.find("?")
Out[75]:
10

Módulo 2 - Python Data Structures

In [76]:
# Lists e Tuples
In [77]:
ratings = (1, 2, 3)
In [79]:
ratings[2]
Out[79]:
3
In [80]:
new_tuple = (3, 5)
In [81]:
concat_tuple = ratings + new_tuple
In [86]:
concat_tuple[2:4]
Out[86]:
(3, 3)
In [90]:
sorted(ratings)
Out[90]:
[1, 2, 3]
In [108]:
# Nesting (uma tuple dentro de uma tuple)
NT = (1, (2, 3), 4, 5, 6)
NT[1][0]
Out[108]:
2
In [120]:
# Lists
l = ["Teste", 1, 1.2]
l[0:3]
Out[120]:
['Teste', 1, 1.2]
In [122]:
l.extend([4, 5])
In [124]:
l.append(7)
In [126]:
l
Out[126]:
['Teste', 1, 1.2, 4, 5, 7]
In [129]:
l[0] = 156
In [131]:
del(l[0])
In [133]:
"Split;By;Delimiter".split(";")
Out[133]:
['Split', 'By', 'Delimiter']
In [136]:
# Copiar uma lista

A = [1,2,3,4,5]
B = A[:]
B
Out[136]:
[1, 2, 3, 4, 5]
In [138]:
B=["a","b","c"] 
B[1:]
Out[138]:
['b', 'c']
In [142]:
B[2]
Out[142]:
'c'
In [143]:
# Sets
In [151]:
set1 = {1, 2, 3, 4, 5, 5, 5, 6, 6, 6, 7}
set1
set(A)
set1.add(41)
set1.remove(2)
set1
# Usar o IN para verificar se há um item no set
121 in set1
Out[151]:
False
In [152]:
set2 = {1,2,3,89,99}
In [162]:
set2.issubset(set2)
Out[162]:
True
In [165]:
S={'A','B','C'}
U={'A','Z','C'}
S & U
Out[165]:
{'A', 'C'}
In [166]:
S.union(U)
Out[166]:
{'A', 'B', 'C', 'Z'}
In [10]:
# Dictionaries

dict1 = {"Crisis": 1985, "Kingdom": 1996, "Identity": 2004}
"Crisis" in dict1
Out[10]:
True
In [11]:
dict1.values()
Out[11]:
dict_values([1985, 1996, 2004])

Módulo 3 - Python Programming Fundamentals

In [12]:
a = 6
a == 6
Out[12]:
True
In [20]:
age = 18
if (age > 18):
    print("Menor de idade")
elif(age == 18):
    print("Acabou de fazer 18")
else:
    print("Maior de idade")
Acabou de fazer 18
In [24]:
if (age > 18 and age <= 30 or age == 10):
    print("Age is {}".format(str(age)))
elif (age <= 5):
    print("Too young")
else:
    print("I don't know anymore")
I don't know anymore
In [27]:
list = range(1, 10)
for i in (list):
    print(str(i))
1
2
3
4
5
6
7
8
9
In [40]:
squares = ['red', 'yellow', 'green', 'purple', 'blue']

for i in range(len(squares)):
    squares[i] = 'white'
print(squares)

for square in squares:
    square = 'red'
    print(square)
    
for i, square in enumerate(squares):
    print("Index: {}, Square: {}".format(str(i), square))
['white', 'white', 'white', 'white', 'white']
red
red
red
red
red
Index: 0, Square: white
Index: 1, Square: white
Index: 2, Square: white
Index: 3, Square: white
Index: 4, Square: white
In [46]:
newSquares = []
i = 0
while (i < len(squares) and squares[i] == 'white'):
    newSquares.append(squares[i])
    i += 1
    
newSquares
Out[46]:
['white', 'white', 'white', 'white', 'white']
In [48]:
A=[3,4,5]

for a in A:
    print(a)
3
4
5
In [49]:
# Funções
def calculate(a, b):
    return a + b

calculate(1, 4)
Out[49]:
5
In [58]:
lista = [1, 8, 7, 5, 3, 2, 4, 8, 19]

for i in range(len(lista)):
    if (i < len(lista) - 1):
        print(str(calculate(lista[i], lista[i + 1])))
sum(lista)
9
15
12
8
5
6
12
27
Out[58]:
57
In [59]:
listaOrdenada = sorted(lista)
listaOrdenada
Out[59]:
[1, 2, 3, 4, 5, 7, 8, 8, 19]
In [63]:
lista.sort()
lista
Out[63]:
[1, 2, 3, 4, 5, 7, 8, 8, 19]
In [64]:
def ordenar(lista):
    return sorted(lista)
In [65]:
novaLista = [8, 9, 5, 1, 3, 4, 6]
print(ordenar(novaLista))
[1, 3, 4, 5, 6, 8, 9]
In [66]:
def printNames(*names):
    for name in names:
        print(name)
In [67]:
printNames("Nome 1", "Nome 2", "Nome 3", "Nome 4")
Nome 1
Nome 2
Nome 3
Nome 4
In [68]:
type(1)
Out[68]:
int
In [69]:
type(lista)
Out[69]:
list
In [71]:
type(a)
Out[71]:
int
In [94]:
class Circle(object):
    
    def __init__(self, radius = 10, color = 'red'):
        self.radius = radius
        self.color = color
    
    def addRadius(self, radius):
        self.radius = self.radius + radius
        
    def changeColor(self, color):
        self.color = color
    
class Rectangle(object):
    
    def __init__(self, color, height, width):
        self.color = color
        self.height = height
        self.width = width
In [95]:
redCircle = Circle(10, 'red')
redCircle.color = "green"
redCircle.color
Out[95]:
'green'
In [96]:
redCircle.addRadius(10)
redCircle.radius
Out[96]:
20
In [97]:
redCircle.changeColor('Blue')
redCircle.color
Out[97]:
'Blue'
In [99]:
dir(redCircle)
Out[99]:
['__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 'addRadius',
 'changeColor',
 'color',
 'radius']
In [123]:
newCircle = Circle(radius = 20, color = 'red')
newCircle.color
Out[123]:
'red'

Módulo 4 - Working With Data In Python

In [213]:
file1 = open("dados/example1.txt", "w")
In [214]:
file1.name
Out[214]:
'dados/example1.txt'
In [215]:
file1.mode
print(file1.closed)
file1.write("File 1\nFile 2\nFile 3")
False
Out[215]:
20
In [216]:
with open("dados/example1.txt", "r") as file1:
    stuff = file1.read()
print(file1.closed)
print(stuff)
True
File 1
File 2
File 3
In [217]:
stuff
Out[217]:
'File 1\nFile 2\nFile 3'
In [218]:
with open("dados/example1.txt", "r") as file1:
    stuff = file1.readlines()
print(file1.closed)
print(stuff)
stuff
True
['File 1\n', 'File 2\n', 'File 3']
Out[218]:
['File 1\n', 'File 2\n', 'File 3']
In [219]:
with open("dados/example1.txt", "r") as file1:
    stuff = file1.readlines(14)
    print(stuff)
['File 1\n', 'File 2\n', 'File 3']
In [220]:
with open("dados/example2.txt", "w") as file2:
    file2.write("New file using With Open")

file2.closed
Out[220]:
True
In [221]:
lines = ["Linha 1\n", "Linha 2\n", "Linha 3\n", "Linha 4\n"]

with open("dados/example3.txt", "w") as file3:
    for line in lines:
        file3.write(line)
with open("dados/example3.txt", "r") as wfile3:
    writtenFile = wfile3.readlines()
writtenFile
Out[221]:
['Linha 1\n', 'Linha 2\n', 'Linha 3\n', 'Linha 4\n']
In [229]:
# Copiando os dados de um arquivo existente para outro arquivo, criando um example5.txt e copiando os dados de example4.txt
with open("dados/example3.txt", "r") as file3:
    with open("dados/example4.txt", "w") as file4:
        for line in file3:
            file4.write(line)

# Comparando os arquivos copiados
file3Str = ""
file4Str = ""

with open("dados/example3.txt", "r") as file3:
    file3Str = file3.readlines()

with open("dados/example4.txt", "r") as file4:
    file4Str = file4.readlines()

if (file3Str == file4Str):
    print("File3 and File4 are equals")
else:
    print("File3 and File4 are different")
File3 and File4 are equals
In [247]:
import pandas as pd

# Gerar um arquivo CSV para estudo
with open("dados/csv_data.csv", "w") as csvFile:
    csvFile.write("Id;Description;Data\n")
    csvFile.write("1;Car;206\n")
    csvFile.write("2;Bus;1511\n")
    csvFile.write("3;Bike;13\n")
    csvFile.write("4;Motorcycle;895\n")
    csvFile.write("5;Truck;332\n")
    csvFile.write("6;Plane;60\n")

df = pd.read_csv("dados/csv_data.csv", delimiter = ";")
df.head()
Out[247]:
Id Description Data
0 1 Car 206
1 2 Bus 1511
2 3 Bike 13
3 4 Motorcycle 895
4 5 Truck 332
In [248]:
# Criando um dataframe
dictionary = {
    'Id': [1, 2, 3, 4, 5],
    'Description': ['Car', 'Bus', 'Bike', 'Motorcycle', 'Truck'],
    'Data': [206, 1511, 13, 895, 332]
}
df2 = pd.DataFrame(dictionary)
df2.head()
Out[248]:
Id Description Data
0 1 Car 206
1 2 Bus 1511
2 3 Bike 13
3 4 Motorcycle 895
4 5 Truck 332
In [255]:
# Particionar partes de um DataFrame em outro

df3 = pd.DataFrame(df2[['Id', 'Description']])
df3.head()
Out[255]:
Id Description
0 1 Car
1 2 Bus
2 3 Bike
3 4 Motorcycle
4 5 Truck
In [294]:
item = df3.iloc[0, 1]
item
Out[294]:
'Car'
In [296]:
df4 = pd.DataFrame({
  'years': [1998, 1998, 1997, 1982, 2005, 2004, 2005, 2005, 2004, 2001]    
})
In [301]:
# Dados distintos, sem duplicatas
df4["years"].unique()
Out[301]:
array([1998, 1997, 1982, 2005, 2004, 2001], dtype=int64)
In [307]:
# Filtrando o DataFrame apenas pelos anos maiores que 2000
df4[df4["years"] > 2000]
Out[307]:
years
4 2005
5 2004
6 2005
7 2005
8 2004
9 2001
In [311]:
df3.to_csv("dados/data_frame.csv")
In [317]:
df5 = pd.DataFrame({'a':[1,2,1],'b':[1,1,1]}) 
df5['a'] == 1
Out[317]:
0     True
1    False
2     True
Name: a, dtype: bool

Módulo 5 - Working With NumPy Arrays

In [334]:
import numpy as np

# Arrays 1D - 1 Dimensão

a = np.array([1, 2, 3, 4, 5])
type(nparray)
Out[334]:
numpy.ndarray
In [335]:
a.size
Out[335]:
5
In [336]:
a.ndim
Out[336]:
1
In [337]:
a.shape
Out[337]:
(5,)
In [338]:
a.dtype
Out[338]:
dtype('int32')
In [339]:
f = np.array([1.5, 1.2, 5.3, 12.0, 14.1])
f.dtype
Out[339]:
dtype('float64')
In [340]:
f[0:2]
Out[340]:
array([1.5, 1.2])
In [348]:
f[0:2] = 85.4, 12.3
f[0:2]
Out[348]:
array([85.4, 12.3])
In [358]:
# Soma de dois vetores sem numpy
u = [0, 1]
v = [1, 0]
z = []

for i, j in zip(u, v):
    z.append(i + j)

print(z)
# Soma de dois vetores com numpy
    
u = np.array([0, 1])
v = np.array([1, 0])
z = np.array(u + v)

z
[1, 1]
Out[358]:
array([1, 1])
In [360]:
y = np.array([1, 2])
z = 2 * y
z
Out[360]:
array([2, 4])
In [398]:
z = np.dot(u, v)
z
Out[398]:
0
In [363]:
z = u + 1
z
Out[363]:
array([1, 2])
In [364]:
z.mean()
Out[364]:
1.5
In [365]:
z.max()
Out[365]:
2
In [366]:
z.min()
Out[366]:
1
In [367]:
z.std()
Out[367]:
0.5
In [372]:
np.linspace(-5, 5, num = 5)
Out[372]:
array([-5. , -2.5,  0. ,  2.5,  5. ])
In [373]:
np.linspace(-5, 5, num = 100)
Out[373]:
array([-5.        , -4.8989899 , -4.7979798 , -4.6969697 , -4.5959596 ,
       -4.49494949, -4.39393939, -4.29292929, -4.19191919, -4.09090909,
       -3.98989899, -3.88888889, -3.78787879, -3.68686869, -3.58585859,
       -3.48484848, -3.38383838, -3.28282828, -3.18181818, -3.08080808,
       -2.97979798, -2.87878788, -2.77777778, -2.67676768, -2.57575758,
       -2.47474747, -2.37373737, -2.27272727, -2.17171717, -2.07070707,
       -1.96969697, -1.86868687, -1.76767677, -1.66666667, -1.56565657,
       -1.46464646, -1.36363636, -1.26262626, -1.16161616, -1.06060606,
       -0.95959596, -0.85858586, -0.75757576, -0.65656566, -0.55555556,
       -0.45454545, -0.35353535, -0.25252525, -0.15151515, -0.05050505,
        0.05050505,  0.15151515,  0.25252525,  0.35353535,  0.45454545,
        0.55555556,  0.65656566,  0.75757576,  0.85858586,  0.95959596,
        1.06060606,  1.16161616,  1.26262626,  1.36363636,  1.46464646,
        1.56565657,  1.66666667,  1.76767677,  1.86868687,  1.96969697,
        2.07070707,  2.17171717,  2.27272727,  2.37373737,  2.47474747,
        2.57575758,  2.67676768,  2.77777778,  2.87878788,  2.97979798,
        3.08080808,  3.18181818,  3.28282828,  3.38383838,  3.48484848,
        3.58585859,  3.68686869,  3.78787879,  3.88888889,  3.98989899,
        4.09090909,  4.19191919,  4.29292929,  4.39393939,  4.49494949,
        4.5959596 ,  4.6969697 ,  4.7979798 ,  4.8989899 ,  5.        ])
In [379]:
np.sin(u)
Out[379]:
array([0.        , 0.84147098])
In [382]:
import matplotlib.pyplot as plt
%matplotlib inline

x = np.linspace(-10, 10, 100)
y = np.sin(x)

plt.gcf().set_size_inches(16, 8)
plt.plot(x, y)
Out[382]:
[<matplotlib.lines.Line2D at 0x1f131e54688>]
In [390]:
# Arrays 2D - 2 Dimensões (Matrizes)

a = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
In [395]:
n = a.ndim
n
Out[395]:
2
In [399]:
a.shape
Out[399]:
(3, 3)
In [400]:
a.size
Out[400]:
9
In [415]:
a[0, 0:3]
Out[415]:
array([1, 2, 3])
In [416]:
a[2, 2:4]
Out[416]:
array([9])
In [417]:
a[1, 0]
Out[417]:
4
In [425]:
m1 = np.array([[0, 1, 2], [3, 4, 5]])
m2 = np.array([[4, 3, 5], [2, 0, 1]])
mr = m1 + m2
mr
Out[425]:
array([[4, 4, 7],
       [5, 4, 6]])
In [427]:
mr2 = m1 * m2
mr2
Out[427]:
array([[ 0,  3, 10],
       [ 6,  0,  5]])
In [429]:
mr3 = 2 * m1 
mr3
Out[429]:
array([[ 0,  2,  4],
       [ 6,  8, 10]])
In [1]:
# Multiplicação matricial
m1 = np.array([[1, 2, 3], [4, 5, 6]])
m2 = np.array([[1, 2], [3, 4], [5, 6]])

mr = np.dot(m1, m2)
mr
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-1-a5f33f5ac812> in <module>
      1 # Multiplicação matricial
----> 2 m1 = np.array([[1, 2, 3], [4, 5, 6]])
      3 m2 = np.array([[1, 2], [3, 4], [5, 6]])
      4 
      5 mr = np.dot(m1, m2)

NameError: name 'np' is not defined
In [2]:
int(3.2)
Out[2]:
3
In [3]:
 A='1234567'
In [4]:
A[1::2]
Out[4]:
'246'
In [5]:
Name="Michael Jackson" 
In [6]:
Name.find('el')
Out[6]:
5
In [7]:
A='1' 
In [8]:
B='2'
In [9]:
A+B
Out[9]:
'12'
In [10]:
 '1,2,3,4'.split(',')
Out[10]:
['1', '2', '3', '4']
In [11]:
A=[1,'a']
B=[2,1,'d']
A+B
Out[11]:
[1, 'a', 2, 1, 'd']
In [12]:
V={'A','B'}
V.add('C')
In [13]:
V
Out[13]:
{'A', 'B', 'C'}
In [ ]:
 
In [14]:
V={'A','B','C'}
V.add('C')
V
Out[14]:
{'A', 'B', 'C'}
In [16]:
A=['1','2','3']
for a in A:
    print(2*a)
11
22
33
In [17]:
def Add(x,y):
    z=y+x
    return(y)
In [19]:
Add('1', '1')
Out[19]:
'1'