You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

319 lines
15 KiB
Python

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

"""
███╗ ███╗ █████╗ ████████╗██╗ ██╗███████╗███╗ ███╗ █████╗ ████████╗██╗ ██████╗ █████╗ ██╗
████╗ ████║██╔══██╗╚══██╔══╝██║ ██║██╔════╝████╗ ████║██╔══██╗╚══██╔══╝██║██╔════╝██╔══██╗██║
██╔████╔██║███████║ ██║ ███████║█████╗ ██╔████╔██║███████║ ██║ ██║██║ ███████║██║
██║╚██╔╝██║██╔══██║ ██║ ██╔══██║██╔══╝ ██║╚██╔╝██║██╔══██║ ██║ ██║██║ ██╔══██║██║
██║ ╚═╝ ██║██║ ██║ ██║ ██║ ██║███████╗██║ ╚═╝ ██║██║ ██║ ██║ ██║╚██████╗██║ ██║███████╗
╚═╝ ╚═╝╚═╝ ╚═╝ ╚═╝ ╚═╝ ╚═╝╚══════╝╚═╝ ╚═╝╚═╝ ╚═╝ ╚═╝ ╚═╝ ╚═════╝╚═╝ ╚═╝╚══════╝
███╗ ███╗ ██████╗ ██████╗ ███████╗██╗ ██╗███╗ ██╗ ██████╗
████╗ ████║██╔═══██╗██╔══██╗██╔════╝██║ ██║████╗ ██║██╔════╝
██╔████╔██║██║ ██║██║ ██║█████╗ ██║ ██║██╔██╗ ██║██║ ███╗
██║╚██╔╝██║██║ ██║██║ ██║██╔══╝ ██║ ██║██║╚██╗██║██║ ██║
██║ ╚═╝ ██║╚██████╔╝██████╔╝███████╗███████╗██║██║ ╚████║╚██████╔╝
╚═╝ ╚═╝ ╚═════╝ ╚═════╝ ╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
团队成员: 王岩琪 袁欣萍 郭禹含 龚智勋
"""
import pandas as pd
import numpy as np
def main():
"""
数据清洗
"""
with pd.ExcelFile("/home/bobmaster/Downloads/数学建模/附件1化学成分及力学性能.xlsx") as origin_data:
pd_chemicals_raw = pd.read_excel(origin_data, "化学成分", usecols=[0, 2, 3, 4, 5, 6, 7])
pd_physics_raw = pd.read_excel(origin_data, "力学性能")
pd_chemical = pd_chemicals_raw.iloc[1:, :]
pd_physics = pd_physics_raw.dropna(how="any")
# pd_chemical = pd_chemical.reindex(index = pd_chemical.index[::-1])
pd_physics_ronglianhao = pd_physics.iloc[:, 0].astype("int64")
pd_physics_qufu = pd_physics.iloc[:, 2]
pd_physics_kangla = pd_physics.iloc[:, 3]
pd_physics_yanshen = pd_physics.iloc[:, 4]
# 提取相同熔炼号的力学性能数据
comp_table = pd_physics.iloc[:, 0].duplicated(keep="last") # 比较表
# phy_num = pd_physics.count() # 力学表数据量 11213
phy_num = 11213
# phy_ronglianhao = []
phy_dict = {}
phy_qufu = []
phy_kangla = []
phy_yanshen = []
temp = 0
for i in range(phy_num):
phy_qufu.append(pd_physics_qufu[i])
phy_kangla.append(pd_physics_kangla[i])
phy_yanshen.append(pd_physics_yanshen[i])
if (comp_table[i] == False):
# phy_ronglianhao[temp] = pd_physics_ronglianhao[i]
phy_dict[pd_physics_ronglianhao[i]] = [phy_qufu, phy_kangla, phy_yanshen]
temp += 1
phy_qufu = []
phy_kangla = []
phy_yanshen = []
# 数据规约 - 力学性能数据均值和标准差
phy_dict_qufu_mean = {}
phy_dict_qufu_std = {}
phy_dict_kangla_mean = {}
phy_dict_kangla_std = {}
phy_dict_yanshen_mean = {}
phy_dict_yanshen_std = {}
phy_dict_qufu_mean_list = []
phy_dict_qufu_std_list = []
phy_dict_kangla_mean_list = []
phy_dict_kangla_std_list = []
phy_dict_yanshen_mean_list = []
phy_dict_yanshen_std_list = []
for key in phy_dict:
np_physics_array_qufu = np.array(phy_dict[key][0])
np_physics_array_kangla = np.array(phy_dict[key][1])
np_physics_array_yanshen = np.array(phy_dict[key][2])
phy_dict_qufu_mean[key] = np_physics_array_qufu.mean()
phy_dict_qufu_std[key] = np_physics_array_qufu.std()
phy_dict_kangla_mean[key] = np_physics_array_kangla.mean()
phy_dict_kangla_std[key] = np_physics_array_kangla.std()
phy_dict_yanshen_mean[key] = np_physics_array_yanshen.mean()
phy_dict_yanshen_std[key] = np_physics_array_yanshen.std()
# 清洗化学成分
# 重建索引保证在同一熔炼号的情况下与力学指标数据匹配
pd_chem_ronglianhao = pd_chemical.iloc[:, 0].astype("int64")
pd_chem_ronglianhao = pd_chem_ronglianhao.drop_duplicates().reset_index().iloc[:, 1]
pd_chem_E1_data = pd_chemical.iloc[:, 1].reset_index().iloc[:, 1]
pd_chem_E2_data = pd_chemical.iloc[:, 2].reset_index().iloc[:, 1]
pd_chem_E3_data = pd_chemical.iloc[:, 3].reset_index().iloc[:, 1]
pd_chem_E4_data = pd_chemical.iloc[:, 4].reset_index().iloc[:, 1]
pd_chem_E5_data = pd_chemical.iloc[:, 5].reset_index().iloc[:, 1]
pd_chem_E6_data = pd_chemical.iloc[:, 6].reset_index().iloc[:, 1]
pd_chem_E1 = {}
pd_chem_E2 = {}
pd_chem_E3 = {}
pd_chem_E4 = {}
pd_chem_E5 = {}
pd_chem_E6 = {}
temp = 0
# 数据规约 - 化学成分
# 0-701 清洗后得到的范围
for i in range(702):
if (i % 2 != 0 and temp != 321):
pd_chem_E1[pd_chem_ronglianhao[temp]] = (pd_chem_E1_data[i - 1] + pd_chem_E1_data[i]) / 2
pd_chem_E2[pd_chem_ronglianhao[temp]] = (pd_chem_E2_data[i - 1] + pd_chem_E2_data[i]) / 2
pd_chem_E3[pd_chem_ronglianhao[temp]] = (pd_chem_E3_data[i - 1] + pd_chem_E3_data[i]) / 2
pd_chem_E4[pd_chem_ronglianhao[temp]] = (pd_chem_E4_data[i - 1] + pd_chem_E4_data[i]) / 2
pd_chem_E5[pd_chem_ronglianhao[temp]] = (pd_chem_E5_data[i - 1] + pd_chem_E5_data[i]) / 2
pd_chem_E6[pd_chem_ronglianhao[temp]] = (pd_chem_E6_data[i - 1] + pd_chem_E6_data[i]) / 2
temp += 1
# 整理出最终所需数据并保证化学成分与力学性能数据一致性
E1_list = []
E2_list = []
E3_list = []
E4_list = []
E5_list = []
E6_list = []
for key in pd_chem_E1:
if key in phy_dict:
E1_list.append(pd_chem_E1[key])
E2_list.append(pd_chem_E2[key])
E3_list.append(pd_chem_E3[key])
E4_list.append(pd_chem_E4[key])
E5_list.append(pd_chem_E5[key])
E6_list.append(pd_chem_E6[key])
phy_dict_qufu_mean_list.append(phy_dict_qufu_mean[key])
phy_dict_qufu_std_list.append(phy_dict_qufu_std[key])
phy_dict_kangla_mean_list.append(phy_dict_kangla_mean[key])
phy_dict_kangla_std_list.append(phy_dict_kangla_std[key])
phy_dict_yanshen_mean_list.append(phy_dict_yanshen_mean[key])
phy_dict_yanshen_std_list.append(phy_dict_yanshen_std[key])
np_E1 = np.array(E1_list)
np_E2 = np.array(E2_list)
np_E3 = np.array(E3_list)
np_E4 = np.array(E4_list)
np_E5 = np.array(E5_list)
np_E6 = np.array(E6_list)
# 初始化二维直方图数据
# dist1 材料
dist1_E1 = np_E1
dist1_E2 = np_E2
dist1_E3 = np_E3
dist1_E4 = np_E4
dist1_E5 = np_E5
dist1_E6 = np_E6
# dist2 力学性能均值
dist2_qufu = np.array(phy_dict_qufu_mean_list)
dist2_kangla = np.array(phy_dict_kangla_mean_list)
dist2_yanshen = np.array(phy_dict_yanshen_mean_list)
# dist3 力学性能标准差
dist3_qufu = np.array(phy_dict_qufu_std_list)
dist3_kangla = np.array(phy_dict_kangla_std_list)
dist3_yanshen = np.array(phy_dict_yanshen_std_list)
# 绘制化学成分与力学特性关系的二维直方图
from hist2d import create_hist2d
create_hist2d(dist1_E1, dist2_qufu, title="化学成分E1与屈服特性的关系", xlabel="E1 %", ylabel="屈服特性均值")
create_hist2d(dist1_E1, dist2_kangla, title="化学成分E1与抗拉特性的关系", xlabel="E1 %", ylabel="抗拉特性均值")
create_hist2d(dist1_E1, dist2_yanshen, title="化学成分E1与延伸率特性的关系", xlabel="E1 %", ylabel="延伸率特性均值")
create_hist2d(dist1_E2, dist2_qufu, title="化学成分E2与屈服特性的关系", xlabel="E2 %", ylabel="屈服特性均值")
create_hist2d(dist1_E2, dist2_kangla, title="化学成分E2与抗拉特性的关系", xlabel="E2 %", ylabel="抗拉特性均值")
create_hist2d(dist1_E2, dist2_yanshen, title="化学成分E2与延伸率特性的关系", xlabel="E2 %", ylabel="延伸率特性均值")
create_hist2d(dist1_E3, dist2_qufu, title="化学成分E3与屈服特性的关系", xlabel="E3 %", ylabel="屈服特性均值")
create_hist2d(dist1_E3, dist2_kangla, title="化学成分E3与抗拉特性的关系", xlabel="E3 %", ylabel="抗拉特性均值")
create_hist2d(dist1_E3, dist2_yanshen, title="化学成分E3与延伸率特性的关系", xlabel="E3 %", ylabel="延伸率特性均值")
create_hist2d(dist1_E4, dist2_qufu, title="化学成分E4与屈服特性的关系", xlabel="E4 %", ylabel="屈服特性均值")
create_hist2d(dist1_E4, dist2_kangla, title="化学成分E4与抗拉特性的关系", xlabel="E4 %", ylabel="抗拉特性均值")
create_hist2d(dist1_E4, dist2_yanshen, title="化学成分E4与延伸率特性的关系", xlabel="E4 %", ylabel="延伸率特性均值")
create_hist2d(dist1_E5, dist2_qufu, title="化学成分E5与屈服特性的关系", xlabel="E5 %", ylabel="屈服特性均值", fig=(10, 10))
create_hist2d(dist1_E5, dist2_kangla, title="化学成分E5与抗拉特性的关系", xlabel="E5 %", ylabel="抗拉特性均值", fig=(10, 10))
create_hist2d(dist1_E5, dist2_yanshen, title="化学成分E5与延伸率特性的关系", xlabel="E5 %", ylabel="延伸率特性均值", fig=(10, 10))
create_hist2d(dist1_E6, dist2_qufu, title="化学成分E6与屈服特性的关系", xlabel="E6 %", ylabel="屈服特性均值")
create_hist2d(dist1_E6, dist2_kangla, title="化学成分E6与抗拉特性的关系", xlabel="E6 %", ylabel="抗拉特性均值")
create_hist2d(dist1_E6, dist2_yanshen, title="化学成分E6与延伸率特性的关系", xlabel="E6 %", ylabel="延伸率特性均值")
# 创建 OLS 回归模型
from linear_regression import OlsModel
from linear_regression import ols_calcutate_all
x = np.array([dist1_E1, dist1_E2, dist1_E3, dist1_E4, dist1_E5, dist1_E6]).transpose()
# 材料与屈服特性均值回归模型
y = np.array(dist2_qufu)
qufu_mean_ols_model = OlsModel(x, y)
# 如需打印报告请删掉下一行的注释
# print(qufu_mean_ols_model.results.summary())
# 材料与抗拉特性均值回归模型
y = np.array(dist2_kangla)
kangla_mean_ols_model = OlsModel(x, y)
# 材料与延伸率特性均值回归模型
y = np.array(dist2_yanshen)
yanshen_mean_ols_model = OlsModel(x, y)
# 材料与屈服特性标准差回归模型
y = np.array(dist3_qufu)
qufu_std_ols_model = OlsModel(x, y)
# 材料与抗拉特性标准差回归模型
y = np.array(dist3_kangla)
kangla_std_ols_model = OlsModel(x, y)
# 材料与延伸率特性标准差回归模型
y = np.array(dist3_yanshen)
yanshen_std_ols_model = OlsModel(x, y)
# 给定熔炼号计算均值和标准差
# ronglianhao = 90624
# x1 = pd_chem_E1[ronglianhao]
# x2 = pd_chem_E2[ronglianhao]
# x3 = pd_chem_E3[ronglianhao]
# x4 = pd_chem_E4[ronglianhao]
# x5 = pd_chem_E5[ronglianhao]
# x6 = pd_chem_E6[ronglianhao]
# x = np.array([1, x1, x2, x3, x4, x5, x6])
# ols_calcutate_all(x, qufu_mean_ols_model, qufu_std_ols_model,
# kangla_mean_ols_model, kangla_std_ols_model,
# yanshen_mean_ols_model, yanshen_std_ols_model)
"""
屈服均值: [281.04367017]
抗拉均值: [302.12712467]
延伸率均值: [11.72968023]
屈服标准差: [4.04484533]
抗拉标准差: [3.60625011]
延伸率标准差: [0.68357895]
"""
from linear_regression import MlrModel
from linear_regression import mlr_calcutate_all
# 创建 MLR 多元线性回归模型
x = np.array([dist1_E1, dist1_E2, dist1_E3, dist1_E4, dist1_E5, dist1_E6]).transpose()
# 材料与屈服特性均值回归模型
y = np.array(dist2_qufu)
qufu_mean_mlr_model = MlrModel(x, y)
# 回归系数
# qufu_mean_mlr_model.results.coef_
# 常数,回归方程截距
# qufu_mean_mlr_model.results.intercept_
# 材料与抗拉特性均值回归模型
y = np.array(dist2_kangla)
kangla_mean_mlr_model = MlrModel(x, y)
# 材料与延伸率特性均值回归模型
y = np.array(dist2_yanshen)
yanshen_mean_mlr_model = MlrModel(x, y)
# 材料与屈服特性标准差回归模型
y = np.array(dist3_qufu)
qufu_std_mlr_model = MlrModel(x, y)
# 材料与抗拉特性标准差回归模型
y = np.array(dist3_kangla)
kangla_std_mlr_model = MlrModel(x, y)
# 材料与延伸率特性标准差回归模型
y = np.array(dist3_yanshen)
yanshen_std_mlr_model = MlrModel(x, y)
# 给定熔炼号计算均值和标准差
# ronglianhao = 90624
# x1 = pd_chem_E1[ronglianhao]
# x2 = pd_chem_E2[ronglianhao]
# x3 = pd_chem_E3[ronglianhao]
# x4 = pd_chem_E4[ronglianhao]
# x5 = pd_chem_E5[ronglianhao]
# x6 = pd_chem_E6[ronglianhao]
# x = np.array([1, x1, x2, x3, x4, x5, x6]).reshape(-1,6)
# ols_calcutate_all(x, qufu_mean_ols_model, qufu_std_ols_model,
# kangla_mean_ols_model, kangla_std_ols_model,
# yanshen_mean_ols_model, yanshen_std_ols_model)
"""
屈服均值: [281.04919773]
抗拉均值: [302.13923671]
延伸率均值: [11.75333675]
屈服标准差: [4.06391763]
抗拉标准差: [3.6079243]
延伸率标准差: [0.68167218]
"""
from heatmap import create_heatmap
# 绘制热点图
dataset = pd.DataFrame(
{'屈服': phy_dict_qufu_mean_list, '抗拉': phy_dict_kangla_mean_list, '延伸率': phy_dict_yanshen_mean_list,
'E1': np_E1, 'E2': np_E2, 'E3': np_E3, 'E4': np_E4, 'E5': np_E5, 'E6': np_E6})
create_heatmap(dataset)
# from lineplot import create_lineplot
# 绘制折线图
# x = "E1" 横坐标,从 E1-E6 选取
# y = "屈服" 纵坐标,从屈服、抗拉、延伸率 选取
# create_lineplot(dataset, x, y)
if __name__=='__main__':
main()