Dans la suite, nous supposerons que pandas
et NumPy
sont respectivements importées
avec les labels pd
et np
:
import pandas as pd
import numpy as np
>>> df = pd.DataFrame()
>>> df
Empty DataFrame
Columns: []
Index: []
>>> df = pd.DataFrame(columns = ['C1', 'C2', 'C3'])
>>> df
Empty DataFrame
Columns: [C1, C2, C3]
Index: []
>>> df = pd.DataFrame(index = ['R1', 'R2', 'R3', 'R4'])
>>> df
Empty DataFrame
Columns: []
Index: [R1, R2, R3, R4]
>>> df = pd.DataFrame(index = ['R1', 'R2', 'R3', 'R4'], columns = ['C1', 'C2', 'C3'])
>>> df
C1 C2 C3
R1 NaN NaN NaN
R2 NaN NaN NaN
R3 NaN NaN NaN
R4 NaN NaN NaN
>>> df = pd.DataFrame([['myValue']*4]*3)
>>> df
0 1 2 3
0 myValue myValue myValue myValue
1 myValue myValue myValue myValue
2 myValue myValue myValue myValue
>>> df = pd.DataFrame ( [ [1,2,3] , [4,5,6] ] )
>>> df
0 1 2
0 1 2 3
1 4 5 6
>>> myList=[ [1,2,3] , [4,5,6] ]
>>> df = pd.DataFrame(myList)
>>> df
0 1 2
0 1 2 3
1 4 5 6
>>> dict={'C1': [1, 4], 'C2': [2, 5], 'C3': [3, 6] }
>>> df = pd.DataFrame(dict)
>>> df
C1 C2 C3
0 1 2 3
1 4 5 6
>>> import numpy as np
>>> ar=np.array([ [1,2,3] , [4,5,6] ])
>>> df = pd.DataFrame(ar)
>>> df
0 1 2
0 1 2 3
1 4 5 6
>>> df = pd.DataFrame(np.random.randn(26, 3))
>>> len(df)
26
>>> df = pd.DataFrame(np.random.randn(20, 3))
>>> df.head()
0 1 2
0 0.015568 -0.800550 0.995735
1 -0.442080 -0.468169 -0.037934
2 -0.056612 0.602909 0.925391
3 -0.026737 -0.995818 -0.971497
4 2.087257 -0.499205 0.523772
>>> df = pd.DataFrame(np.random.randn(20, 3))
>>> df.head(7)
0 1 2
0 0.677265 -0.258972 -0.952009
1 -0.386076 0.045306 0.032096
2 -1.563352 -1.878010 -0.464059
3 0.406500 0.342614 -0.138983
4 -0.122513 1.020732 -0.943917
5 -1.521629 -1.247523 0.505387
6 -0.049314 0.507946 -0.543623
>>> df = pd.DataFrame(np.random.randn(20, 3))
>>> df.tail()
0 1 2
15 0.104621 -1.089801 -1.115315
16 -1.211985 1.725402 -0.045163
17 0.346928 -0.659337 -0.337814
18 -0.495076 -0.183246 -1.468753
19 -2.023775 0.108744 0.299338
>>> df = pd.DataFrame(np.random.randn(20, 3))
>>> df.tail(3)
0 1 2
17 1.240378 1.526510 1.443443
18 -0.374890 1.472440 -0.122570
19 -1.431079 -0.782603 -1.258807
>>> df = pd.DataFrame(np.random.randn(20, 3))
>>> df.sample(3)
0 1 2
2 0.648810 -0.118470 0.674101
17 0.982777 -1.372479 0.532249
15 2.257330 -1.118517 -0.880213
>>> df = pd.DataFrame(np.random.randn(20, 3))
>>> df.shape
(20, 3)
>>> df = pd.DataFrame(np.random.randn(20, 3))
>>> df.describe()
0 1 2
count 20.000000 20.000000 20.000000
mean -0.073553 0.082187 -0.119492
std 1.025429 1.063697 0.999306
min -2.046268 -1.323986 -1.759392
25% -0.755839 -0.697858 -0.962823
50% 0.072743 -0.239909 -0.225349
75% 0.555210 0.745141 0.268392
max 1.932597 1.933956 1.952840
>>> df = pd.DataFrame(np.random.randn(20, 3))
>>> df.info()
Int64Index: 20 entries, 0 to 19
Data columns (total 3 columns):
0 20 non-null float64
1 20 non-null float64
2 20 non-null float64
dtypes: float64(3)
memory usage: 640.0 bytes
>>> df = pd.DataFrame( np.arange(15).reshape((5, 3)) , columns=['A','B','C'] )
>>> df.ix[2,'B']
7
ou :
>>> df = pd.DataFrame( np.arange(15).reshape((5, 3)) , columns=['A','B','C'] )
>>> df.B.ix[2]
7
>>> df = pd.DataFrame( np.arange(15).reshape((5, 3)) , columns=['A','B','C'] )
>>> df['C']
0 2
1 5
2 8
3 11
4 14
Name: C, dtype: int64
>>> df = pd.DataFrame( np.arange(15).reshape((5, 3)) , columns=['A','B','C'] )
>>> df[df.columns[2]]
0 2
1 5
2 8
3 11
4 14
Name: C, dtype: int64
ou :
>>> df = pd.DataFrame( np.arange(15).reshape((5, 3)) , columns=['A','B','C'] )
>>> df.ix[:,2]
0 2
1 5
2 8
3 11
4 14
Name: C, dtype: int64
ou :
>>> df = pd.DataFrame( np.arange(15).reshape((5, 3)) , columns=['A','B','C'] )
>>> df.iloc[:,2]
0 2
1 5
2 8
3 11
4 14
Name: C, dtype: int64
>>> df = pd.DataFrame( np.arange(15).reshape((5, 3)) , index = ['r1','r2','r3','r4','r5'])
>>> df.iloc[2]
0 6
1 7
2 8
Name: r3, dtype: int64
ou :
>>> df = pd.DataFrame( np.arange(15).reshape((5, 3)) , index = ['r1','r2','r3','r4','r5'])
>>> df.ix[2]
0 6
1 7
2 8
Name: r3, dtype: int64
>>> df = pd.DataFrame( np.arange(15).reshape((5, 3)) , index = ['r1','r2','r3','r4','r5'])
>>> df.loc['r2']
0 3
1 4
2 5
Name: r2, dtype: int64
ou :
>>> df = pd.DataFrame( np.arange(15).reshape((5, 3)) , index = ['r1','r2','r3','r4','r5'])
>>> df.ix['r2']
0 3
1 4
2 5
Name: r2, dtype: int64
>>> df = pd.DataFrame( np.arange(15).reshape((5, 3)), index = ['r1','r2','r3','r4','r5'], columns=['A', 'B', 'C'])
>>> df.iloc[2:4, 1:3]
B C
r3 7 8
r4 10 11
ou :
>>> df = pd.DataFrame( np.arange(15).reshape((5, 3)), index = ['r1','r2','r3','r4','r5'], columns=['A', 'B', 'C'])
>>> df.ix[2:4, 1:3]
B C
r3 7 8
r4 10 11
>>> df = pd.DataFrame( np.arange(15).reshape((5, 3)), index = ['r1','r2','r3','r4','r5'], columns=['A', 'B', 'C'])
>>> df.loc[['r2', 'r3', 'r4'], ['B', 'C']]
B C
r2 4 5
r3 7 8
r4 10 11
ou :
>>> df = pd.DataFrame( np.arange(15).reshape((5, 3)), index = ['r1','r2','r3','r4','r5'], columns=['A', 'B', 'C'])
>>> df.ix[['r2', 'r3', 'r4'], ['B', 'C']]
B C
r2 4 5
r3 7 8
r4 10 11
>>> df = pd.DataFrame(np.random.randn(7, 3), columns=['A','B','C'])
>>> for index in df.index:
... print ("df[" + str(index) + "]['B']=" + str(df['B'][index]))
...
df[0]['B']=0.283954993534
df[1]['B']=-0.133365147072
df[2]['B']=-0.818946302448
df[3]['B']=0.268719821998
df[4]['B']=0.502778137428
df[5]['B']=1.63455639172
df[6]['B']=-0.267843736515
>>> df = pd.DataFrame(np.random.randn(7, 3), columns=['A','B','C'])
>>> for index, row in df.iterrows():
... print ("df[" + str(index) + "]['B']=" + str(row['B']))
...
df[0]['B']=0.527559779712
df[1]['B']=0.430168409951
df[2]['B']=0.451917861905
df[3]['B']=0.439450997295
df[4]['B']=-0.786818697065
df[5]['B']=0.760795411371
df[6]['B']=0.871493527713
>>> df = pd.DataFrame(np.random.randn(7, 3), columns=['A','B','C'])
>>> for row in df.itertuples():
... print ("df[" + str(row.Index) + "]['B']=" + str(row.B))
...
df[0]['B']=-1.768612347340564
df[1]['B']=-0.35989018257067396
df[2]['B']=-0.6307394564694447
df[3]['B']=-0.8745804495249828
df[4]['B']=-0.882188475278571
df[5]['B']=-1.618288671196083
df[6]['B']=1.2159031714168103
>>> df=pd.DataFrame ([[1, 2, 3],[4, 5, 6]] , index = ['R1', 'R2'], columns = ['C1', 'C2', 'C3'])
>>> df.to_csv('filename.csv')
>>>
$ cat filename.csv
,C1,C2,C3
R1,1,2,3
R2,4,5,6
>>> df=pd.DataFrame ([[1, 2, 3],[4, 5, 6]] , index = ['R1', 'R2'], columns = ['C1', 'C2', 'C3'])
>>> df.to_csv('filename.csv', header = False)
>>>
$ cat filename.csv
R1,1,2,3
R2,4,5,6
>>> df=pd.DataFrame ([[1, 2, 3],[4, 5, 6]] , index = ['R1', 'R2'], columns = ['C1', 'C2', 'C3'])
>>> df.to_csv('filename.csv', index = False)
>>>
$ cat filename.csv
C1,C2,C3
1,2,3
4,5,6
>>> df=pd.DataFrame ([[1, 2, 3],[4, 5, 6]] , index = ['R1', 'R2'], columns = ['C1', 'C2', 'C3'])
>>> df.to_csv('filename.tsv', sep="\t")
>>>
$ cat filename.tsv
C1 C2 C3
R1 1 2 3
R2 4 5 6
>>> df=pd.DataFrame ([[1, 2, 3],[4, 5, 6]] , index = ['R1', 'R2'], columns = ['C1', 'C2', 'C3'])
>>> df.to_csv('filename.csv', sep="+")
>>>
$ cat filename.csv
+C1+C2+C3
R1+1+2+3
R2+4+5+6
$ cat filename.csv
C1,C2,C3
1,2,3
4,5,6
>>> df = pd.read_csv('filename.csv')
>>> df
C1 C2 C3
0 1 2 3
1 4 5 6
$ cat filename.csv
1,2,3
4,5,6
>>> df=pd.read_csv('filename.csv', header=None)
>>> df
0 1 2
0 1 2 3
1 4 5 6
$ cat filename.csv
,C1,C2,C3
R0,1,2,3
R1,4,5,6
>>> df=pd.read_csv('filename.csv', index_col = 0)
>>> df
C1 C2 C3
R0 1 2 3
R1 4 5 6
$ cat filename.tsv
C1 C2 C3
1 2 3
4 5 6
>>> df=pd.read_csv('filename.tsv', sep="\t")
>>> df
C1 C2 C3
0 1 2 3
1 4 5 6
$ cat filename.csv
C1+C2+C3
1+2+3
4+5+6
>>> df=pd.read_csv('filename.csv', sep="+")
>>> df
C1 C2 C3
0 1 2 3
1 4 5 6