3. pandas使用1

    技术2022-07-10  150

    DataFrame的结构

    import pandas as pd import numpy as np # 创建一个符合正态分布的10个股票的5天的涨跌幅数据 stock_change=np.random.normal(0,1,(10,5)) stock_change array([[ 1.76548834, -2.1033278 , -0.44765295, 1.6133684 , 0.56440317], [-0.28406163, 0.33190268, 0.15697138, -0.15352828, 0.39168077], [ 0.97804543, 0.70247765, 0.48852276, -0.84483313, -0.4901491 ], [ 0.32663842, -1.18491493, -1.85617695, -0.08334347, -0.88467526], [-0.0606477 , -0.3282171 , 0.13469079, -0.33644424, 0.49229211], [-0.08971546, 0.29502656, 0.58225254, -1.15526343, 0.1121633 ], [ 0.08384949, 0.63896248, 0.03189999, -0.17538923, -0.06095104], [ 0.0892971 , -1.54426915, 0.22247612, -0.31827644, 1.4569718 ], [-1.31604909, -1.09219638, -0.64118638, -0.55174267, 2.54943429], [-0.75827099, 0.19581938, -0.11856612, 0.44903057, -0.07891904]]) # 创建dataframe stock_num=["股票{}".format(i)for i in range(10)] #构造行标 date=pd.date_range(start="20200629", periods=5,freq="B") stock_change=pd.DataFrame(stock_change, index=stock_num,columns=date) stock_change 2020-06-29 00:00:002020-06-30 00:00:002020-07-01 00:00:002020-07-02 00:00:002020-07-03 00:00:00股票01.765488-2.103328-0.4476531.6133680.564403股票1-0.2840620.3319030.156971-0.1535280.391681股票20.9780450.7024780.488523-0.844833-0.490149股票30.326638-1.184915-1.856177-0.083343-0.884675股票4-0.060648-0.3282170.134691-0.3364440.492292股票5-0.0897150.2950270.582253-1.1552630.112163股票60.0838490.6389620.031900-0.175389-0.060951股票70.089297-1.5442690.222476-0.3182761.456972股票8-1.316049-1.092196-0.641186-0.5517432.549434股票9-0.7582710.195819-0.1185660.449031-0.078919 stock_change.shape (10, 5) stock_change.index Index(['股票0', '股票1', '股票2', '股票3', '股票4', '股票5', '股票6', '股票7', '股票8', '股票9'], dtype='object') stock_change.columns DatetimeIndex(['2020-06-29', '2020-06-30', '2020-07-01', '2020-07-02', '2020-07-03'], dtype='datetime64[ns]', freq='B') stock_change.values array([[ 1.76548834, -2.1033278 , -0.44765295, 1.6133684 , 0.56440317], [-0.28406163, 0.33190268, 0.15697138, -0.15352828, 0.39168077], [ 0.97804543, 0.70247765, 0.48852276, -0.84483313, -0.4901491 ], [ 0.32663842, -1.18491493, -1.85617695, -0.08334347, -0.88467526], [-0.0606477 , -0.3282171 , 0.13469079, -0.33644424, 0.49229211], [-0.08971546, 0.29502656, 0.58225254, -1.15526343, 0.1121633 ], [ 0.08384949, 0.63896248, 0.03189999, -0.17538923, -0.06095104], [ 0.0892971 , -1.54426915, 0.22247612, -0.31827644, 1.4569718 ], [-1.31604909, -1.09219638, -0.64118638, -0.55174267, 2.54943429], [-0.75827099, 0.19581938, -0.11856612, 0.44903057, -0.07891904]]) stock_change.T 股票0股票1股票2股票3股票4股票5股票6股票7股票8股票92020-06-291.765488-0.2840620.9780450.326638-0.060648-0.0897150.0838490.089297-1.316049-0.7582712020-06-30-2.1033280.3319030.702478-1.184915-0.3282170.2950270.638962-1.544269-1.0921960.1958192020-07-01-0.4476530.1569710.488523-1.8561770.1346910.5822530.0319000.222476-0.641186-0.1185662020-07-021.613368-0.153528-0.844833-0.083343-0.336444-1.155263-0.175389-0.318276-0.5517430.4490312020-07-030.5644030.391681-0.490149-0.8846750.4922920.112163-0.0609511.4569722.549434-0.078919 stock_change.head(7) 2020-06-29 00:00:002020-06-30 00:00:002020-07-01 00:00:002020-07-02 00:00:002020-07-03 00:00:00股票01.765488-2.103328-0.4476531.6133680.564403股票1-0.2840620.3319030.156971-0.1535280.391681股票20.9780450.7024780.488523-0.844833-0.490149股票30.326638-1.184915-1.856177-0.083343-0.884675股票4-0.060648-0.3282170.134691-0.3364440.492292股票5-0.0897150.2950270.582253-1.1552630.112163股票60.0838490.6389620.031900-0.175389-0.060951 # 索引值修改 new_index=["股票00{}".format(i)for i in range(10)] stock_change.index=new_index stock_change 2020-06-29 00:00:002020-06-30 00:00:002020-07-01 00:00:002020-07-02 00:00:002020-07-03 00:00:00股票0001.765488-2.103328-0.4476531.6133680.564403股票001-0.2840620.3319030.156971-0.1535280.391681股票0020.9780450.7024780.488523-0.844833-0.490149股票0030.326638-1.184915-1.856177-0.083343-0.884675股票004-0.060648-0.3282170.134691-0.3364440.492292股票005-0.0897150.2950270.582253-1.1552630.112163股票0060.0838490.6389620.031900-0.175389-0.060951股票0070.089297-1.5442690.222476-0.3182761.456972股票008-1.316049-1.092196-0.641186-0.5517432.549434股票009-0.7582710.195819-0.1185660.449031-0.078919 stock_change.reset_index(drop=True) 2020-06-29 00:00:002020-06-30 00:00:002020-07-01 00:00:002020-07-02 00:00:002020-07-03 00:00:0001.765488-2.103328-0.4476531.6133680.5644031-0.2840620.3319030.156971-0.1535280.39168120.9780450.7024780.488523-0.844833-0.49014930.326638-1.184915-1.856177-0.083343-0.8846754-0.060648-0.3282170.134691-0.3364440.4922925-0.0897150.2950270.582253-1.1552630.11216360.0838490.6389620.031900-0.175389-0.06095170.089297-1.5442690.222476-0.3182761.4569728-1.316049-1.092196-0.641186-0.5517432.5494349-0.7582710.195819-0.1185660.449031-0.078919 stock_change 2020-06-29 00:00:002020-06-30 00:00:002020-07-01 00:00:002020-07-02 00:00:002020-07-03 00:00:00股票0001.765488-2.103328-0.4476531.6133680.564403股票001-0.2840620.3319030.156971-0.1535280.391681股票0020.9780450.7024780.488523-0.844833-0.490149股票0030.326638-1.184915-1.856177-0.083343-0.884675股票004-0.060648-0.3282170.134691-0.3364440.492292股票005-0.0897150.2950270.582253-1.1552630.112163股票0060.0838490.6389620.031900-0.175389-0.060951股票0070.089297-1.5442690.222476-0.3182761.456972股票008-1.316049-1.092196-0.641186-0.5517432.549434股票009-0.7582710.195819-0.1185660.449031-0.078919 stock_change.set_index("2020-6-29 00:00:00") 2020-06-30 00:00:002020-07-01 00:00:002020-07-02 00:00:002020-07-03 00:00:002020-6-29 00:00:001.765488-2.103328-0.4476531.6133680.564403-0.2840620.3319030.156971-0.1535280.3916810.9780450.7024780.488523-0.844833-0.4901490.326638-1.184915-1.856177-0.083343-0.884675-0.060648-0.3282170.134691-0.3364440.492292-0.0897150.2950270.582253-1.1552630.1121630.0838490.6389620.031900-0.175389-0.0609510.089297-1.5442690.222476-0.3182761.456972-1.316049-1.092196-0.641186-0.5517432.549434-0.7582710.195819-0.1185660.449031-0.078919 stock_change 2020-06-29 00:00:002020-06-30 00:00:002020-07-01 00:00:002020-07-02 00:00:002020-07-03 00:00:00股票0001.765488-2.103328-0.4476531.6133680.564403股票001-0.2840620.3319030.156971-0.1535280.391681股票0020.9780450.7024780.488523-0.844833-0.490149股票0030.326638-1.184915-1.856177-0.083343-0.884675股票004-0.060648-0.3282170.134691-0.3364440.492292股票005-0.0897150.2950270.582253-1.1552630.112163股票0060.0838490.6389620.031900-0.175389-0.060951股票0070.089297-1.5442690.222476-0.3182761.456972股票008-1.316049-1.092196-0.641186-0.5517432.549434股票009-0.7582710.195819-0.1185660.449031-0.078919 type(stock_change.loc["股票004"]) pandas.core.series.Series stock_change.loc["股票004"] 2020-06-29 -0.060648 2020-06-30 -0.328217 2020-07-01 0.134691 2020-07-02 -0.336444 2020-07-03 0.492292 Freq: B, Name: 股票004, dtype: float64 pd.Series(np.arange(2,20,2)) 0 2 1 4 2 6 3 8 4 10 5 12 6 14 7 16 8 18 dtype: int64 sr=pd.Series({'red':100,"blue":200,"queen":500, 'yellow':1000}) sr blue 200 queen 500 red 100 yellow 1000 dtype: int64 sr.index Index(['blue', 'queen', 'red', 'yellow'], dtype='object') sr.values array([ 200, 500, 100, 1000])

    pandas 操作suoyin

    data=pd.read_csv("./stock_day/stock_day.csv") data = data.drop(["ma5","ma10","ma20","v_ma5","v_ma10","v_ma20"], axis=1) #让数据简单一些 data.head() openhighcloselowvolumeprice_changep_changeturnover2018-02-2723.5325.8824.1623.5395578.030.632.682.392018-02-2622.8023.7823.5322.8060985.110.693.021.532018-02-2322.8823.3722.8222.7152914.010.542.421.322018-02-2222.2522.7622.2822.0236105.010.361.640.902018-02-1421.4921.9921.9221.4823331.040.442.050.58 data["open"]["2018-02-23"] #pandas必须先列后行 22.88 data.loc["2018-02-23"]["open"] #使用loc可以先行后列 22.88 data.loc["2018-02-26","open"] 22.8 data.iloc[1,0] 22.8 data.loc[data.index[0:4],['open','close','high','low']] openclosehighlow2018-02-2723.5324.1625.8823.532018-02-2622.8023.5323.7822.802018-02-2322.8822.8223.3722.712018-02-2222.2522.2822.7622.02 data.iloc[0:4,data.columns.get_indexer(['open','close','high','low'])] openclosehighlow2018-02-2723.5324.1625.8823.532018-02-2622.8023.5323.7822.802018-02-2322.8822.8223.3722.712018-02-2222.2522.2822.7622.02 data.price_change.head() 2018-02-27 0.63 2018-02-26 0.69 2018-02-23 0.54 2018-02-22 0.36 2018-02-14 0.44 Name: price_change, dtype: float64 # data["open"]=8 data.head() openhighcloselowvolumeprice_changep_changeturnover2018-02-2723.5325.8824.1623.5395578.030.632.682.392018-02-2622.8023.7823.5322.8060985.110.693.021.532018-02-2322.8823.3722.8222.7152914.010.542.421.322018-02-2222.2522.7622.2822.0236105.010.361.640.902018-02-1421.4921.9921.9221.4823331.040.442.050.58 data=data.sort_values(by="p_change", ascending=False).head() data.head() openhighcloselowvolumeprice_changep_changeturnover2015-08-2815.4016.4616.4615.00117827.601.5010.034.032015-05-2127.5028.2228.2226.50121190.112.5710.024.152016-12-2218.5020.4220.4218.45150470.831.8610.023.772015-08-0416.2017.3517.3515.8094292.631.5810.023.232016-07-0718.6618.6618.6618.4148756.551.7010.021.67 data=data.sort_values(by=['open','high']) data.head() openhighcloselowvolumeprice_changep_changeturnover2015-08-2815.4016.4616.4615.00117827.601.5010.034.032015-08-0416.2017.3517.3515.8094292.631.5810.023.232016-12-2218.5020.4220.4218.45150470.831.8610.023.772016-07-0718.6618.6618.6618.4148756.551.7010.021.672015-05-2127.5028.2228.2226.50121190.112.5710.024.15 data.sort_index() openhighcloselowvolumeprice_changep_changeturnover2015-05-2127.5028.2228.2226.50121190.112.5710.024.152015-08-0416.2017.3517.3515.8094292.631.5810.023.232015-08-2815.4016.4616.4615.00117827.601.5010.034.032016-07-0718.6618.6618.6618.4148756.551.7010.021.672016-12-2218.5020.4220.4218.45150470.831.8610.023.77 data["p_change"].sort_values(ascending=True) 2015-08-04 10.02 2016-12-22 10.02 2016-07-07 10.02 2015-05-21 10.02 2015-08-28 10.03 Name: p_change, dtype: float64 data["p_change"].sort_index(ascending=True) 2015-05-21 10.02 2015-08-04 10.02 2015-08-28 10.03 2016-07-07 10.02 2016-12-22 10.02 Name: p_change, dtype: float64 data openhighcloselowvolumeprice_changep_changeturnover2015-08-2815.4016.4616.4615.00117827.601.5010.034.032015-08-0416.2017.3517.3515.8094292.631.5810.023.232016-12-2218.5020.4220.4218.45150470.831.8610.023.772016-07-0718.6618.6618.6618.4148756.551.7010.021.672015-05-2127.5028.2228.2226.50121190.112.5710.024.15 data+100 openhighcloselowvolumeprice_changep_changeturnover2015-08-28115.40116.46116.46115.00117927.60101.50110.03104.032015-08-04116.20117.35117.35115.8094392.63101.58110.02103.232016-12-22118.50120.42120.42118.45150570.83101.86110.02103.772016-07-07118.66118.66118.66118.4148856.55101.70110.02101.672015-05-21127.50128.22128.22126.50121290.11102.57110.02104.15 data/100 openhighcloselowvolumeprice_changep_changeturnover2015-08-280.15400.16460.16460.15001178.27600.01500.10030.04032015-08-040.16200.17350.17350.1580942.92630.01580.10020.03232016-12-220.18500.20420.20420.18451504.70830.01860.10020.03772016-07-070.18660.18660.18660.1841487.56550.01700.10020.01672015-05-210.27500.28220.28220.26501211.90110.02570.10020.0415 data.add(100) openhighcloselowvolumeprice_changep_changeturnover2015-08-28115.40116.46116.46115.00117927.60101.50110.03104.032015-08-04116.20117.35117.35115.8094392.63101.58110.02103.232016-12-22118.50120.42120.42118.45150570.83101.86110.02103.772016-07-07118.66118.66118.66118.4148856.55101.70110.02101.672015-05-21127.50128.22128.22126.50121290.11102.57110.02104.15 data["p_change"]>2 2015-08-28 True 2015-08-04 True 2016-12-22 True 2016-07-07 True 2015-05-21 True Name: p_change, dtype: bool data openhighcloselowvolumeprice_changep_changeturnover2015-08-2815.4016.4616.4615.00117827.601.5010.034.032015-08-0416.2017.3517.3515.8094292.631.5810.023.232016-12-2218.5020.4220.4218.45150470.831.8610.023.772016-07-0718.6618.6618.6618.4148756.551.7010.021.672015-05-2127.5028.2228.2226.50121190.112.5710.024.15 data[data["open"]>17] openhighcloselowvolumeprice_changep_changeturnover2016-12-2218.5020.4220.4218.45150470.831.8610.023.772016-07-0718.6618.6618.6618.4148756.551.7010.021.672015-05-2127.5028.2228.2226.50121190.112.5710.024.15 data.query("p_change>10&open>18").head() openhighcloselowvolumeprice_changep_changeturnover2016-12-2218.5020.4220.4218.45150470.831.8610.023.772016-07-0718.6618.6618.6618.4148756.551.7010.021.672015-05-2127.5028.2228.2226.50121190.112.5710.024.15 data["price_change"].isin([1.86,2.57]) 2015-08-28 False 2015-08-04 False 2016-12-22 True 2016-07-07 False 2015-05-21 True Name: price_change, dtype: bool data.describe() openhighcloselowvolumeprice_changep_changeturnovercount5.0000005.0000005.0000005.0000005.0000005.0000005.0000005.000000mean19.25200020.22200020.22200018.832000106507.5440001.84200010.0220003.370000std4.8243674.7129634.7129634.55520337950.2086950.4289760.0044721.014101min15.40000016.46000016.46000015.00000048756.5500001.50000010.0200001.67000025.20000017.35000017.35000015.80000094292.6300001.58000010.0200003.23000050.50000018.66000018.66000018.410000117827.6000001.70000010.0200003.77000075.66000020.42000020.42000018.450000121190.1100001.86000010.0200004.030000max27.50000028.22000028.22000026.500000150470.8300002.57000010.0300004.150000 data.idxmax() open 2015-05-21 high 2015-05-21 close 2015-05-21 low 2015-05-21 volume 2016-12-22 price_change 2015-05-21 p_change 2015-08-28 turnover 2015-05-21 dtype: object data=pd.read_csv("./stock_day/stock_day.csv") data = data.drop(["ma5","ma10","ma20","v_ma5","v_ma10","v_ma20"], axis=1) #让数据简单一些 data.head() openhighcloselowvolumeprice_changep_changeturnover2018-02-2723.5325.8824.1623.5395578.030.632.682.392018-02-2622.8023.7823.5322.8060985.110.693.021.532018-02-2322.8823.3722.8222.7152914.010.542.421.322018-02-2222.2522.7622.2822.0236105.010.361.640.902018-02-1421.4921.9921.9221.4823331.040.442.050.58 data["p_change"].sort_index().cumsum().head() 2015-03-02 2.62 2015-03-03 4.06 2015-03-04 5.63 2015-03-05 7.65 2015-03-06 16.16 Name: p_change, dtype: float64 data["p_change"].sort_index().cumsum().plot() <matplotlib.axes._subplots.AxesSubplot at 0x7f6c86b3c9e8>
    Processed: 0.008, SQL: 9