# import python packages
import pandas as pd
import numpy as np
import matplotlib as plt
%matplotlib inline


# import titanic_train.csv
ti=pd.read_csv("data/titanic_train.csv")
ti.head()


ti.tail()


#data types & missing values  #info is for the description of your data
ti.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 891 entries, 0 to 890
Data columns (total 12 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   PassengerId  891 non-null    int64  
 1   Survived     891 non-null    int64  
 2   Pclass       891 non-null    int64  
 3   Name         891 non-null    object 
 4   Sex          891 non-null    object 
 5   Age          714 non-null    float64
 6   SibSp        891 non-null    int64  
 7   Parch        891 non-null    int64  
 8   Ticket       891 non-null    object 
 9   Fare         891 non-null    float64
 10  Cabin        204 non-null    object 
 11  Embarked     889 non-null    object 
dtypes: float64(2), int64(5), object(5)
memory usage: 83.7+ KB


#dtypes is for the type of data yuo have
ti.dtypes

PassengerId      int64
Survived         int64
Pclass           int64
Name            object
Sex             object
Age            float64
SibSp            int64
Parch            int64
Ticket          object
Fare           float64
Cabin           object
Embarked        object
dtype: object


#Describe shows a quick statistic summary of your data
#this is a good way to examine the numerical columns
ti.describe()


# OR use this method if you want to find individual results
print(ti['Age'].mean())
print(ti['Age'].std())
print(ti['Age'].max())
print(ti['Age'].min())

30.08756302521008
18.86342448744758
350.0
0.35


ti.hist(figsize=(8,8));


# describe for a single column
ti['Fare'].describe()

count    891.000000
mean      32.204208
std       49.693429
min        0.000000
25%        7.910400
50%       14.454200
75%       31.000000
max      512.329200
Name: Fare, dtype: float64


# do this for categorical columns
ti['Embarked'].describe()

count     889
unique      3
top         S
freq      644
Name: Embarked, dtype: object


#pivot in excel
ti.groupby('Embarked').size()

Embarked
C    168
Q     77
S    644
dtype: int64


# how many unique values in the Sex column
ti.groupby('Sex').size()

Sex
female    312
hello       1
hi          1
male      577
dtype: int64


ti.head(2)


# remove an unnecessary column (PassengerId)
ti = ti.drop('PassengerId', axis=1)
ti.head()


# remove multiple columns (Name, Ticket, Cabin)
ti = ti.drop(['Name','Ticket'], axis=1)
ti.head()


# can you remove Cabin column? (this column is not much useful for further analysis - predictive analytics)
ti=ti.drop(['Cabin'], axis=1)
ti.head()


ti.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 891 entries, 0 to 890
Data columns (total 8 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   Survived  891 non-null    int64  
 1   Pclass    891 non-null    int64  
 2   Sex       891 non-null    object 
 3   Age       714 non-null    float64
 4   SibSp     891 non-null    int64  
 5   Parch     891 non-null    int64  
 6   Fare      891 non-null    float64
 7   Embarked  889 non-null    object 
dtypes: float64(2), int64(4), object(2)
memory usage: 55.8+ KB


# how many missing values in each column or variable
ti.isnull().sum()

Survived      0
Pclass        0
Sex           0
Age         177
SibSp         0
Parch         0
Fare          0
Embarked      2
dtype: int64


ti.head()


len(ti)

891


#handling missing value: remove the rows with any missing value
ti_rev = ti.dropna()
ti_rev.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 712 entries, 0 to 890
Data columns (total 8 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   Survived  712 non-null    int64  
 1   Pclass    712 non-null    int64  
 2   Sex       712 non-null    object 
 3   Age       712 non-null    float64
 4   SibSp     712 non-null    int64  
 5   Parch     712 non-null    int64  
 6   Fare      712 non-null    float64
 7   Embarked  712 non-null    object 
dtypes: float64(2), int64(4), object(2)
memory usage: 50.1+ KB


ti_rev.head(2)


ti_rev.isnull().sum()

Survived    0
Pclass      0
Sex         0
Age         0
SibSp       0
Parch       0
Fare        0
Embarked    0
dtype: int64


ti.isnull().sum()

Survived      0
Pclass        0
Sex           0
Age         177
SibSp         0
Parch         0
Fare          0
Embarked      2
dtype: int64


ti['Embarked'].describe()

count     889
unique      3
top         S
freq      644
Name: Embarked, dtype: object


# replace null value with the most popular value in a categorial columns
ti = ti.fillna({'Embarked': 'S'})
ti.isnull().sum()

Survived      0
Pclass        0
Sex           0
Age         177
SibSp         0
Parch         0
Fare          0
Embarked      0
dtype: int64


ti['Age'].median()

28.0


# replace null value with the median value 
ti = ti.fillna({'Age': 28})
ti.isnull().sum()

Survived    0
Pclass      0
Sex         0
Age         0
SibSp       0
Parch       0
Fare        0
Embarked    0
dtype: int64


#OR  MUCH BETTER APROACH (USE THIS)
# replace null value with the median value 
ti = ti.fillna({'Age': ti['Age'].mean()})
ti.isnull().sum()

Survived    0
Pclass      0
Sex         0
Age         0
SibSp       0
Parch       0
Fare        0
Embarked    0
dtype: int64


len(ti)

891


# what would you do to replace null values with the mean value
ti=ti.fillna({'Age': ti['Age'].mean()})
ti.isnull().sum()

Survived    0
Pclass      0
Sex         0
Age         0
SibSp       0
Parch       0
Fare        0
Embarked    0
dtype: int64


#we have 4 different types of gender info
ti['Sex'].describe()

count      891
unique       4
top       male
freq       577
Name: Sex, dtype: object


#hello and hi should not be considered a type of gender
ti['Sex'].value_counts()

male      577
female    312
hi          1
hello       1
Name: Sex, dtype: int64


# hello in Sex is inaccurate ...replacing inaccurate value with null value
ti = ti.replace('hello', np.nan)
ti.head()


ti = ti.dropna()
ti.head()


# 350 is an inaccurate value for age and should be replaced by 35
# changinge single value with replace
ti = ti.replace({'Age': 350}, {'Age': 35})
ti.head()


# instead of replacing hello with null value and removing the row, 
# can you replace hi with the most popular value (male) in the categorical column?
ti=ti.replace('hi', 'male')
ti.head()


ti['Sex'].value_counts()

male      578
female    312
Name: Sex, dtype: int64


#above cell is the same as pivot
ti.groupby('Sex').size()

Sex
female    312
male      578
dtype: int64


# Now, there should be male and female in the Sex column

# chaing categorical values with map
ti['Sex'] = ti['Sex'].map({'female': 0, 'male': 1})
ti.head()


# when you analyze data, it is better to transform categorical values to numerical values
# can you convert S, C, Q (Embarked) to 1, 2, 3

ti['Embarked']=ti['Embarked'].map({'S': 1, 'C': 2, 'Q':3})
ti.head()


ti.isnull().sum()

Survived    0
Pclass      0
Sex         0
Age         0
SibSp       0
Parch       0
Fare        0
Embarked    0
dtype: int64


# if the data cleaning is done, you may want to export it as csv or excel
ti.to_csv("data/ti_cleaned.csv")
ti.to_excel("data/ti_cleaned.xlsx")


# create new columns
ti['CurrentAge'] = ti['Age'] + 90
ti.head()


# can you create a new column ("discount") based on Fare?
# discount = Fare * 0.25

ti['discount']=ti['Fare'] * 0.25
ti.head()


# create a copy of existing column
ti['DeparturePort'] = ti['Embarked']
ti.head()


# rename columns
ti = ti.rename(columns={'Pclass': 'PassengerClass'})
ti.head()


# Can you rename discount as Disc
ti=ti.rename(columns={'discount':'Disc'})
ti.head()


# create bins (discretize)

ti['Fare_Category'] = pd.qcut(ti['Fare'], 5)
ti.head()


ti['Fare'].describe()

count    890.000000
mean      32.180730
std       49.716425
min        0.000000
25%        7.903100
50%       14.454200
75%       31.000000
max      512.329200
Name: Fare, dtype: float64


ti.groupby('Fare_Category').size()

Fare_Category
(-0.001, 7.854]      179
(7.854, 10.5]        184
(10.5, 21.317]       171
(21.317, 39.688]     181
(39.688, 512.329]    175
dtype: int64


#rename the name of those bins to 1, 2, 3, 4, 5
ti['Fare_Category_Labels'] = pd.qcut(ti['Fare'], 5, labels=['1', '2', '3', '4', '5'])
ti.head()


ti.dtypes

Survived                   int64
PassengerClass             int64
Sex                        int64
Age                      float64
SibSp                      int64
Parch                      int64
Fare                     float64
Embarked                   int64
CurrentAge               float64
Disc                     float64
DeparturePort              int64
Fare_Category           category
Fare_Category_Labels    category
dtype: object


#Survived is categorical data type. What if I want to change it to integer
ti['Survived'] = ti['Survived'].astype(int)
ti.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 890 entries, 0 to 890
Data columns (total 13 columns):
 #   Column                Non-Null Count  Dtype   
---  ------                --------------  -----   
 0   Survived              890 non-null    int32   
 1   PassengerClass        890 non-null    int64   
 2   Sex                   890 non-null    int64   
 3   Age                   890 non-null    float64 
 4   SibSp                 890 non-null    int64   
 5   Parch                 890 non-null    int64   
 6   Fare                  890 non-null    float64 
 7   Embarked              890 non-null    int64   
 8   CurrentAge            890 non-null    float64 
 9   Disc                  890 non-null    float64 
 10  DeparturePort         890 non-null    int64   
 11  Fare_Category         890 non-null    category
 12  Fare_Category_Labels  890 non-null    category
dtypes: category(2), float64(4), int32(1), int64(6)
memory usage: 82.1 KB


# filtering or selecting columns

#selecting two columns and assigning to a new variable or dataset called "ti_2"
ti_2 = ti[['Survived','Age']]
ti_2.head()


ti_2.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 890 entries, 0 to 890
Data columns (total 2 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   Survived  890 non-null    int32  
 1   Age       890 non-null    float64
dtypes: float64(1), int32(1)
memory usage: 17.4 KB


# can you select two other columns. use a different variable name (ti_3)

ti_3=ti[['CurrentAge', 'Disc']]
ti_3.head()


# Filtering or Selecting rows

#single criterion
print(len(ti[(ti['Fare'] > 200)]))

20


#if you want to see a list of people
ti[(ti['Fare'] > 200)].head(2)


#Multiple conditions have to be combined using & 
len(ti[(ti['Fare'] > 200) & (ti['Survived'] == 1)])

14


#Multiple conditions have to be combined using & 
print(len(ti[(ti['Fare'] < 20)]))
print(len(ti[(ti['Fare'] < 20) & (ti['Survived'] == 1)]))

515
143


(len(ti[(ti['Fare'] < 20) & (ti['Survived'] == 1)])) / (len(ti[(ti['Fare'] < 20)]))

0.27766990291262134


# can you create a filter using a criterion?
# a list of females that survived

ti[(ti['Sex'] == 0) & (ti['Survived'] == 1)].head()


len(ti)

890


# sampling data randomly

# sampling 10 rows only (different samples for every time you run this cell)
ti_10 = ti.sample(10)
ti_10

# to have the same samples for every time 
# ti.sample(10, random_state = 1)


# sort data by column

#sorting: ascending=True is the default
ti_10.sort_values('Fare')


#by= is optional
ti_10.sort_values(by=['Fare'], ascending=False).head()


# Ascending by first column, then descending by second column
ti_10.sort_values(by=['Survived','Age'], ascending=[1,0]).head()


# you can expport this sample data 
ti_10.to_csv("data/ti_10.csv")
ti_10.to_excel("data/ti_10.xlsx")


# can you sample 50 rows?
ti_50 = ti.sample(50)
ti_50


# apply sorting (descending, ascending)
ti_50.sort_values(by=['Survived','CurrentAge'], ascending=[0,1]).head()

	PassengerId	Survived	Pclass	Name	Sex	Age	SibSp	Ticket	Fare	Cabin	Embarked
0	1	0	3	Braund, Mr. Owen Harris	male	22.00	1	A/5 21171	7.2500	NaN	S
1	2	1	1	Cumings, Mrs. John Bradley (Florence Briggs Th...	female	350.00	1	PC 17599	71.2833	C85	C
2	3	1	3	Heikkinen, Miss. Laina	hi	26.00	0	STON/O2. 3101282	7.9250	NaN	S
3	4	1	1	Futrelle, Mrs. Jacques Heath (Lily May Peel)	hello	35.00	1	113803	53.1000	C123	S
4	5	0	3	Allen, Mr. William Henry	male	0.35	0	373450	8.0500	NaN	S

	PassengerId	Survived	Pclass	Name	Sex	Age	SibSp	Parch	Ticket	Fare	Cabin	Embarked
886	887	0	2	Montvila, Rev. Juozas	male	27.0	0	0	211536	13.00	NaN	S
887	888	1	1	Graham, Miss. Margaret Edith	female	19.0	0	0	112053	30.00	B42	S
888	889	0	3	Johnston, Miss. Catherine Helen "Carrie"	female	NaN	1	2	W./C. 6607	23.45	NaN	S
889	890	1	1	Behr, Mr. Karl Howell	male	26.0	0	0	111369	30.00	C148	C
890	891	0	3	Dooley, Mr. Patrick	male	32.0	0	0	370376	7.75	NaN	Q

	Survived	Pclass	Name	Sex	Age	SibSp	Ticket	Fare	Cabin	Embarked
0	0	3	Braund, Mr. Owen Harris	male	22.00	1	A/5 21171	7.2500	NaN	S
1	1	1	Cumings, Mrs. John Bradley (Florence Briggs Th...	female	350.00	1	PC 17599	71.2833	C85	C
2	1	3	Heikkinen, Miss. Laina	hi	26.00	0	STON/O2. 3101282	7.9250	NaN	S
3	1	1	Futrelle, Mrs. Jacques Heath (Lily May Peel)	hello	35.00	1	113803	53.1000	C123	S
4	0	3	Allen, Mr. William Henry	male	0.35	0	373450	8.0500	NaN	S

	Survived	Pclass	Sex	Age	SibSp	Fare	Embarked	CurrentAge
0	0	3	1	22.00	1	7.2500	1	112.00
1	1	1	0	35.00	1	71.2833	2	125.00
2	1	3	1	26.00	0	7.9250	1	116.00
4	0	3	1	0.35	0	8.0500	1	90.35
5	0	3	1	28.00	0	8.4583	3	118.00

	Survived	Pclass	Sex	Age	SibSp	Fare	Embarked	CurrentAge	discount
0	0	3	1	22.00	1	7.2500	1	112.00	1.812500
1	1	1	0	35.00	1	71.2833	2	125.00	17.820825
2	1	3	1	26.00	0	7.9250	1	116.00	1.981250
4	0	3	1	0.35	0	8.0500	1	90.35	2.012500
5	0	3	1	28.00	0	8.4583	3	118.00	2.114575

Patrick Mugisha, "On my honor, as a student, I have neither given nor received unauthorized aid on this academic work."¶

Table of Contents

ETL (Extract Transform Loading): What? Why?¶

Titanic Dataset¶

Essential Data Transformation¶

Finding out data type & missing values¶

Basic statistics¶

Remove (unnessary) columns¶

Handling missing value¶

Handling Inaccurate/Inconsistent Data¶

Intermediate-level Data Transformation¶

	PassengerId	Survived	Pclass	Age	SibSp	Parch	Fare
count	891.000000	891.000000	891.000000	714.000000	891.000000	891.000000	891.000000
mean	446.000000	0.383838	2.308642	30.087563	0.523008	0.381594	32.204208
std	257.353842	0.486592	0.836071	18.863424	1.102743	0.806057	49.693429
min	1.000000	0.000000	1.000000	0.350000	0.000000	0.000000	0.000000
25%	223.500000	0.000000	2.000000	20.000000	0.000000	0.000000	7.910400
50%	446.000000	0.000000	3.000000	28.000000	0.000000	0.000000	14.454200
75%	668.500000	1.000000	3.000000	38.000000	1.000000	0.000000	31.000000
max	891.000000	1.000000	3.000000	350.000000	8.000000	6.000000	512.329200

	PassengerId	Survived	Pclass	Name	Sex	Age	SibSp	Parch	Ticket	Fare	Cabin	Embarked
0	1	0	3	Braund, Mr. Owen Harris	male	22.0	1	0	A/5 21171	7.2500	NaN	S
1	2	1	1	Cumings, Mrs. John Bradley (Florence Briggs Th...	female	350.0	1	0	PC 17599	71.2833	C85	C

	Survived	PassengerClass	Sex	Age	SibSp	Parch	Fare	Embarked	CurrentAge	Disc	DeparturePort	Fare_Category	Fare_Category_Labels
27	0	1	1	19.0	3	2	263.0	1	109.0	65.75	1	(39.688, 512.329]	5
88	1	1	0	23.0	3	2	263.0	1	113.0	65.75	1	(39.688, 512.329]	5

	Survived	PassengerClass	Age	SibSp	Parch	Fare	Embarked	CurrentAge	Disc	DeparturePort	Fare_Category	Fare_Category_Labels
1	1	1	35.0	1	0	71.2833	2	125.0	17.820825	2	(39.688, 512.329]	5
8	1	3	27.0	0	2	11.1333	1	117.0	2.783325	1	(10.5, 21.317]	3
9	1	2	14.0	1	0	30.0708	2	104.0	7.517700	2	(21.317, 39.688]	4
10	1	3	4.0	1	1	16.7000	1	94.0	4.175000	1	(10.5, 21.317]	3
11	1	1	58.0	0	0	26.5500	1	148.0	6.637500	1	(21.317, 39.688]	4

	Survived	PassengerClass	Sex	Age	SibSp	Parch	Fare	Embarked	CurrentAge	Disc	DeparturePort	Fare_Category	Fare_Category_Labels
259	1	2	0	50.0	0	1	26.0000	1	140.0	6.500000	1	(21.317, 39.688]	4
578	0	3	0	28.0	1	0	14.4583	2	118.0	3.614575	2	(10.5, 21.317]	3
223	0	3	1	28.0	0	0	7.8958	1	118.0	1.973950	1	(7.854, 10.5]	2
598	0	3	1	28.0	0	0	7.2250	2	118.0	1.806250	2	(-0.001, 7.854]	1
356	1	1	0	22.0	0	1	55.0000	1	112.0	13.750000	1	(39.688, 512.329]	5
770	0	3	1	24.0	0	0	9.5000	1	114.0	2.375000	1	(7.854, 10.5]	2
270	0	1	1	28.0	0	0	31.0000	1	118.0	7.750000	1	(21.317, 39.688]	4
112	0	3	1	22.0	0	0	8.0500	1	112.0	2.012500	1	(7.854, 10.5]	2
807	0	3	0	18.0	0	0	7.7750	1	108.0	1.943750	1	(-0.001, 7.854]	1
682	0	3	1	20.0	0	0	9.2250	1	110.0	2.306250	1	(7.854, 10.5]	2

	Survived	PassengerClass	Sex	Age	SibSp	Parch	Fare	Embarked	CurrentAge	Disc	DeparturePort	Fare_Category	Fare_Category_Labels
510	1	3	1	29.0	0	0	7.7500	3	119.0	1.937500	3	(-0.001, 7.854]	1
869	1	3	1	4.0	1	1	11.1333	1	94.0	2.783325	1	(10.5, 21.317]	3
150	0	2	1	51.0	0	0	12.5250	1	141.0	3.131250	1	(10.5, 21.317]	3
136	1	1	0	19.0	0	2	26.2833	1	109.0	6.570825	1	(21.317, 39.688]	4
836	0	3	1	21.0	0	0	8.6625	1	111.0	2.165625	1	(7.854, 10.5]	2
16	0	3	1	2.0	4	1	29.1250	3	92.0	7.281250	3	(21.317, 39.688]	4
33	0	2	1	66.0	0	0	10.5000	1	156.0	2.625000	1	(7.854, 10.5]	2
22	1	3	0	15.0	0	0	8.0292	3	105.0	2.007300	3	(7.854, 10.5]	2
576	1	2	0	34.0	0	0	13.0000	1	124.0	3.250000	1	(10.5, 21.317]	3
212	0	3	1	22.0	0	0	7.2500	1	112.0	1.812500	1	(-0.001, 7.854]	1
28	1	3	0	28.0	0	0	7.8792	3	118.0	1.969800	3	(7.854, 10.5]	2
399	1	2	0	28.0	0	0	12.6500	1	118.0	3.162500	1	(10.5, 21.317]	3
574	0	3	1	16.0	0	0	8.0500	1	106.0	2.012500	1	(7.854, 10.5]	2
453	1	1	1	49.0	1	0	89.1042	2	139.0	22.276050	2	(39.688, 512.329]	5
889	1	1	1	26.0	0	0	30.0000	2	116.0	7.500000	2	(21.317, 39.688]	4
274	1	3	0	28.0	0	0	7.7500	3	118.0	1.937500	3	(-0.001, 7.854]	1
199	0	2	0	24.0	0	0	13.0000	1	114.0	3.250000	1	(10.5, 21.317]	3
206	0	3	1	32.0	1	0	15.8500	1	122.0	3.962500	1	(10.5, 21.317]	3
533	1	3	0	28.0	0	2	22.3583	2	118.0	5.589575	2	(21.317, 39.688]	4
722	0	2	1	34.0	0	0	13.0000	1	124.0	3.250000	1	(10.5, 21.317]	3
324	0	3	1	28.0	8	2	69.5500	1	118.0	17.387500	1	(39.688, 512.329]	5
109	1	3	0	28.0	1	0	24.1500	3	118.0	6.037500	3	(21.317, 39.688]	4
90	0	3	1	29.0	0	0	8.0500	1	119.0	2.012500	1	(7.854, 10.5]	2
445	1	1	1	4.0	0	2	81.8583	1	94.0	20.464575	1	(39.688, 512.329]	5
708	1	1	0	22.0	0	0	151.5500	1	112.0	37.887500	1	(39.688, 512.329]	5
393	1	1	0	23.0	1	0	113.2750	2	113.0	28.318750	2	(39.688, 512.329]	5
741	0	1	1	36.0	1	0	78.8500	1	126.0	19.712500	1	(39.688, 512.329]	5
712	1	1	1	48.0	1	0	52.0000	1	138.0	13.000000	1	(39.688, 512.329]	5
736	0	3	0	48.0	1	3	34.3750	1	138.0	8.593750	1	(21.317, 39.688]	4
668	0	3	1	43.0	0	0	8.0500	1	133.0	2.012500	1	(7.854, 10.5]	2
634	0	3	0	9.0	3	2	27.9000	1	99.0	6.975000	1	(21.317, 39.688]	4
17	1	2	1	28.0	0	0	13.0000	1	118.0	3.250000	1	(10.5, 21.317]	3
41	0	2	0	27.0	1	0	21.0000	1	117.0	5.250000	1	(10.5, 21.317]	3
356	1	1	0	22.0	0	1	55.0000	1	112.0	13.750000	1	(39.688, 512.329]	5
104	0	3	1	37.0	2	0	7.9250	1	127.0	1.981250	1	(7.854, 10.5]	2
87	0	3	1	28.0	0	0	8.0500	1	118.0	2.012500	1	(7.854, 10.5]	2
13	0	3	1	39.0	1	5	31.2750	1	129.0	7.818750	1	(21.317, 39.688]	4
621	1	1	1	42.0	1	0	52.5542	1	132.0	13.138550	1	(39.688, 512.329]	5
481	0	2	1	28.0	0	0	0.0000	1	118.0	0.000000	1	(-0.001, 7.854]	1
695	0	2	1	52.0	0	0	13.5000	1	142.0	3.375000	1	(10.5, 21.317]	3
619	0	2	1	26.0	0	0	10.5000	1	116.0	2.625000	1	(7.854, 10.5]	2
213	0	2	1	30.0	0	0	13.0000	1	120.0	3.250000	1	(10.5, 21.317]	3
26	0	3	1	28.0	0	0	7.2250	2	118.0	1.806250	2	(-0.001, 7.854]	1
441	0	3	1	20.0	0	0	9.5000	1	110.0	2.375000	1	(7.854, 10.5]	2
550	1	1	1	17.0	0	2	110.8833	2	107.0	27.720825	2	(39.688, 512.329]	5
624	0	3	1	21.0	0	0	16.1000	1	111.0	4.025000	1	(10.5, 21.317]	3
349	0	3	1	42.0	0	0	8.6625	1	132.0	2.165625	1	(7.854, 10.5]	2
617	0	3	0	26.0	1	0	16.1000	1	116.0	4.025000	1	(10.5, 21.317]	3
482	0	3	1	50.0	0	0	8.0500	1	140.0	2.012500	1	(7.854, 10.5]	2
795	0	2	1	39.0	0	0	13.0000	1	129.0	3.250000	1	(10.5, 21.317]	3