07 ,df 列操作,行操作 : 增,删,改,查,计算,列过滤

    技术2024-11-10  24

    1 ,列操作,查 :data[“Age”]

    代码 : if __name__ == '__main__': # 全列显示 : pd.set_option('display.max_columns', None) # 读文件 csv data = pd.read_csv("titanic_train.csv") df_age = data["Age"] print(df_age) ============================================== 0 22.0 1 38.0 2 26.0

    2 ,列操作,计算 :res = df_age * 2

    目的 : 字段 * 2代码 : if __name__ == '__main__': # 全列显示 : pd.set_option('display.max_columns', None) # 读文件 csv data = pd.read_csv("titanic_train.csv") df_age = data["Age"] res = df_age * 2 print(df_age) print(res) ============================================== 0 22.0 1 38.0 2 26.0 .... ================== 0 44.0 1 76.0 2 52.0 ...

    3 ,列操作 : 增 data[“double_age”] = res

    目的 : 将 double_age 列新增到原数据中代码 : if __name__ == '__main__': # 全列显示 : pd.set_option('display.max_columns', None) # 读文件 csv data = pd.read_csv("titanic_train.csv") df_age = data["Age"] res = df_age * 2 data["double_age"] = res print(data.head(3)) ========================================= Age double_age .... 22.0 44.0 38.0 76.0 26.0 52.0 ....

    4 ,列操作,删除列 :data.drop([“PassengerId”],axis=1)

    代码 : if __name__ == '__main__': # 全列显示 : # pd.set_option('display.max_columns', None) # 读文件 csv data = pd.read_csv("titanic_train.csv") print(data.head(5)) res = data.drop(["PassengerId","Survived"],axis=1) print(res.head(5)) ================================================== PassengerId Survived Pclass ... Fare Cabin Embarked 0 1 0 3 ... 7.2500 NaN S 1 2 1 1 ... 71.2833 C85 C 2 3 1 3 ... 7.9250 NaN S 3 4 1 1 ... 53.1000 C123 S 4 5 0 3 ... 8.0500 NaN S [5 rows x 12 columns] Pclass Name ... Cabin Embarked 0 3 Braund, Mr. Owen Harris ... NaN S 1 1 Cumings, Mrs. John Bradley (Florence Briggs Th... ... C85 C 2 3 Heikkinen, Miss. Laina ... NaN S 3 1 Futrelle, Mrs. Jacques Heath (Lily May Peel) ... C123 S 4 3 Allen, Mr. William Henry ... NaN S [5 rows x 10 columns]

    5 ,列操作,改列名 :data.rename(…)

    精华代码 : data.rename(columns={"PassengerId":"PassengerIdOMG"},inplace=True) 目的 : 1 ,将 PassengerId 列名修改为 PassengerIdOMG代码 : if __name__ == '__main__': # 全列显示 : # pd.set_option('display.max_columns', None) # 读文件 csv data = pd.read_csv("titanic_train.csv") print(data.head(5)) data.rename(columns={"PassengerId":"PassengerIdOMG"},inplace=True) print(data.head(5)) =========================================== PassengerId Survived Pclass ... Fare Cabin Embarked 0 1 0 3 ... 7.2500 NaN S 1 2 1 1 ... 71.2833 C85 C 2 3 1 3 ... 7.9250 NaN S 3 4 1 1 ... 53.1000 C123 S 4 5 0 3 ... 8.0500 NaN S [5 rows x 12 columns] PassengerIdOMG Survived Pclass ... Fare Cabin Embarked 0 1 0 3 ... 7.2500 NaN S 1 2 1 1 ... 71.2833 C85 C 2 3 1 3 ... 7.9250 NaN S 3 4 1 1 ... 53.1000 C123 S 4 5 0 3 ... 8.0500 NaN S [5 rows x 12 columns]

    6 ,行操作,查 1 行 : data.loc[0]

    代码 : if __name__ == '__main__': # 全列显示 : # pd.set_option('display.max_columns', None) # 读文件 csv data = pd.read_csv("titanic_train.csv") res = data.loc[0] print(data.head(3)) print(res) ======================================================================== PassengerId Survived Pclass ... Fare Cabin Embarked 0 1 0 3 ... 7.2500 NaN S 1 2 1 1 ... 71.2833 C85 C 2 3 1 3 ... 7.9250 NaN S [3 rows x 12 columns] ========================================== PassengerId 1 Survived 0 Pclass 3 Name Braund, Mr. Owen Harris Sex male Age 22 SibSp 1 Parch 0 Ticket A/5 21171 Fare 7.25 Cabin NaN Embarked S Name: 0, dtype: object

    7 ,行操作,计算 : res01 = res * 2

    代码 : 乘 2 if __name__ == '__main__': # 全列显示 : # pd.set_option('display.max_columns', None) # 读文件 csv data = pd.read_csv("titanic_train.csv") res = data.loc[0] res01 = res * 2 print(res) print(res01) ================================ PassengerId 1 Survived 0 Pclass 3 Name Braund, Mr. Owen Harris Sex male Age 22 SibSp 1 Parch 0 Ticket A/5 21171 Fare 7.25 Cabin NaN Embarked S Name: 0, dtype: object ================================================================== PassengerId 2 Survived 0 Pclass 6 Name Braund, Mr. Owen HarrisBraund, Mr. Owen Harris Sex malemale Age 44 SibSp 2 Parch 0 Ticket A/5 21171A/5 21171 Fare 14.5 Cabin NaN Embarked SS Name: 0, dtype: object

    8 ,行操作,增 : data.append(res01, ignore_index=True)

    目的 : 1 ,将最后一行 * 2 2 ,再添加回去,成为新的最后一行代码 : if __name__ == '__main__': # 全列显示 : # pd.set_option('display.max_columns', None) # 读文件 csv data = pd.read_csv("titanic_train.csv") res = data.loc[890] # 将数据 * 2 res01 = res * 2 # 将数据加入到 data 中 data = data.append(res01, ignore_index=True) print(data.tail(3)) ============================================== PassengerId Survived Pclass ... Fare Cabin Embarked 889 890 1 1 ... 30.00 C148 C 890 891 0 3 ... 7.75 NaN Q 891 1782 0 6 ... 15.50 NaN QQ

    9 ,行操作,删 : res02 = res01.drop(2)

    目的 : 利用索引删除指定行代码 : if __name__ == '__main__': # 全列显示 : # pd.set_option('display.max_columns', None) # 读文件 csv data = pd.read_csv("titanic_train.csv") res = data.loc[890] # 将数据 * 2 res01 = res * 2 # 将数据加入到 data 中 data = data.append(res01, ignore_index=True) res01 = data.tail(3) print(res01) res01.reset_index(inplace=True,drop=True) print(res01) # 删除第三行 ( 索引为 2 的那行 ) res02 = res01.drop(2) print(res02) =================================================== 889 890 1 1 ... 30.00 C148 C 890 891 0 3 ... 7.75 NaN Q 891 1782 0 6 ... 15.50 NaN QQ [3 rows x 12 columns] PassengerId Survived Pclass ... Fare Cabin Embarked 0 890 1 1 ... 30.00 C148 C 1 891 0 3 ... 7.75 NaN Q 2 1782 0 6 ... 15.50 NaN QQ [3 rows x 12 columns] PassengerId Survived Pclass ... Fare Cabin Embarked 0 890 1 1 ... 30.00 C148 C 1 891 0 3 ... 7.75 NaN Q [2 rows x 12 columns]

    10 ,过滤列 : res01[res01[“Age”]%2==0]

    目的 : 留下年龄为偶数的数思想 : True 留下,False 剔除代码 : if __name__ == '__main__': # 读文件 csv data = pd.read_csv("titanic_train.csv") # 年龄字段 : df_age = data["Age"].to_frame() # 清除空值 res01 = df_age.dropna() # 留下偶数 res02 = res01[res01["Age"]%2==0] print(res02) ============================== Age 0 22.0 1 38.0 2 26.0
    Processed: 0.009, SQL: 9