综合练习

    技术2022-07-11  77

    #1(a) df = pd.read_csv(r'D:\python\python3.6\pysl\Pre_\Pandas(下)综合练习数据集\端午粽子数据.csv') df.columns = df.columns.str.strip() df1 = df[df['发货地址'].notna()] s = df1.groupby('发货地址').get_group('浙江 杭州') s['价格'][~(s['价格'].str.replace(r'-?\d+\.?\d+','True')=='True')] s.loc[[4376], '价格'] = [45] s['价格'].astype('float').mean() #1(b) df1[(df1['标题'].str.contains(r'[嘉兴]{2}')) & (~(df1['发货地址'].str.contains(r'[嘉兴]{2}')))] #1(c) df1['价格'][~(df1['价格'].str.replace(r'-?\d+\.?\d+','True')=='True')] df1.loc[[538, 4376], '价格'] = [45.9, 45] a = df1['价格'].astype('float').quantile(0.2) b = df1['价格'].astype('float').quantile(0.4) c = df1['价格'].astype('float').quantile(0.6) d = df1['价格'].astype('float').quantile(0.8) e = df1['价格'].astype('float').max() s1 = pd.cut(df1['价格'].astype('float'), [0, a, b, c, d, e], labels=['低', '较低', '中', '较高', '高']) df1.insert(1, '类别', s1) df1.sort_values(by='类别', ascending=False) #1(d)(不会) df1['付款人数'].isna().sum() grouped = df1.groupby('类别') #1(e) s3 = pd.Series("商品发货地为" + df1['发货地址'] + ",店铺为" + df1['店铺'] + ",共计" + df1['付款人数'] + ",单价为" + df1['价格'].astype('str')) #1(f)(不会) #2(a) df3 = pd.read_csv(r'D:\python\python3.6\pysl\Pre_\Pandas(下)综合练习数据集\墨尔本温度数据.csv') holiday = pd.date_range(start='1981-05-01', end='1981-05-03').append(pd.date_range(start='1981-10-01', end='1981-10-07')).append( pd.date_range(start='1982-05-01', end='1982-05-03')).append(pd.date_range(start='1982-10-01', end='1982-10-07')).append( pd.date_range(start='1983-05-01', end='1983-05-03')).append(pd.date_range(start='1983-10-01', end='1983-10-07')).append( pd.date_range(start='1984-05-01', end='1984-05-03')).append(pd.date_range(start='1984-10-01', end='1984-10-07')).append( pd.date_range(start='1985-05-01', end='1985-05-03')).append(pd.date_range(start='1985-10-01', end='1985-10-07')).append( pd.date_range(start='1986-05-01', end='1986-05-03')).append(pd.date_range(start='1986-10-01', end='1986-10-07')).append( pd.date_range(start='1987-05-01', end='1987-05-03')).append(pd.date_range(start='1987-10-01', end='1987-10-07')).append( pd.date_range(start='1988-05-01', end='1988-05-03')).append(pd.date_range(start='1988-10-01', end='1988-10-07')).append( pd.date_range(start='1989-05-01', end='1989-05-03')).append(pd.date_range(start='1989-10-01', end='1989-10-07')).append( pd.date_range(start='1990-05-01', end='1990-05-03')).append(pd.date_range(start='1990-10-01', end='1990-10-07')).append( pd.bdate_range(start='1981-01-01', end='1990-12-31', freq='BMS')) df3['Date'] = pd.to_datetime(df3['Date']) df3[~df3['Date'].isin(holiday)].set_index('Date').resample('MS').mean() #2(b) y_mean = df3.set_index('Date').resample('YS').mean() m_mean = df3.set_index('Date').resample('MS').mean() for i in range(81,90): for j in range(1,12): Sj = m_mean['19{0}-{1}'.format(i,j)]/y_mean['19{0}'.format(i)] print(Sj)
    Processed: 0.011, SQL: 9