Files
smt-optimizer/dataloader.py
2024-06-05 22:10:21 +08:00

181 lines
8.8 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import copy
from base_optimizer.optimizer_common import *
def load_data(filename: str, load_feeder=False, auto_register=True):
filename = 'data/' + filename
part_content, step_content = False, False
part_start_line, step_start_line, part_end_line, step_end_line = -1, -1, -1, -1
line_counter = 0
with open(filename, 'r') as file:
line = file.readline()
while line:
if line == '[Part]\n':
part_content = True
part_start_line = line_counter
elif line == '[Step]\n':
step_content = True
step_start_line = line_counter
elif line == '\n':
if part_content:
part_content = False
part_end_line = line_counter
elif step_content:
step_content = False
step_end_line = line_counter
line_counter += 1
line = file.readline()
if part_content:
part_end_line = line_counter
else:
step_end_line = line_counter
file_data = pd.DataFrame(
pd.read_csv(filepath_or_buffer=filename, skiprows=step_start_line + 1, nrows=step_end_line - step_start_line + 1,
sep='\t', header=None))
if len(file_data.columns) == 22:
data_col = ["machine", "bl", "ref", "x", "y", "z", "r", "part", "desc", "group", "fdr", "nz", "hd", "cs", "cy",
"sk", "ar", "fid", "pop", "pl", "lv", "pr"]
elif len(file_data.columns) <= 17:
data_col = ["ref", "x", "y", "z", "r", "part", "desc", "fdr", "nz", "hd", "cs", "cy", "sk", "bl", "ar",
"pl", "lv"]
elif len(file_data.columns) <= 18:
data_col = ["ref", "x", "y", "z", "r", "part", "desc", "fdr", "nz", "hd", "cs", "cy", "sk", "bl", "ar", "fid",
"pl", "lv"]
else:
data_col = ["ref", "x", "y", "z", "r", "part", "desc", "fdr", "nz", "hd", "cs", "cy", "sk", "bl", "ar", "fid",
"", "pl", "lv"]
file_data.columns = data_col
pcb_data, component_data, feeder_data = defaultdict(pd.DataFrame), defaultdict(pd.DataFrame), defaultdict(
pd.DataFrame)
# line_data = line_data.dropna(axis=1)
step_col = ["ref", "x", "y", "z", "r", "part", "desc", "fdr", "nz", "hd", "cs", "cy", "sk", "bl", "ar", "pl", "lv"]
machine_name = defaultdict(int)
for _, data in file_data.iterrows():
if "machine" in file_data.columns:
if data['machine'] not in machine_name.keys():
machine_name[data['machine']] = len(machine_name)
machine_index = machine_name[data['machine']]
else:
machine_index = 0
pcb_data[machine_index] = pcb_data[machine_index]._append(data[step_col], ignore_index=True)
part_col = ["part", "fdr", "nz", 'fdn']
try:
if part_start_line != -1:
part_data = pd.DataFrame(
pd.read_csv(filepath_or_buffer=filename, sep='\t', header=None, skiprows=part_start_line + 1,
nrows=part_end_line - part_start_line - 1))
part_data.columns = part_col
else:
part_data = pd.DataFrame(columns=part_col)
except:
part_data = pd.DataFrame(columns=part_col)
part_data['points'] = 0
part_col = ["part", "fdr", "nz", 'fdn', 'points']
machine_num = len(pcb_data)
for machine_index in range(machine_num):
component_data[machine_index] = pd.DataFrame(columns=part_col)
component_slot = defaultdict(set)
for idx, data in pcb_data[machine_index].iterrows():
if pos := data.fdr.find('F') != 0:
pcb_data[machine_index].loc[idx, 'fdr'] = data.fdr[pos:pos + 1:1] + data.fdr[pos + 2::]
if pos := data.nz.find('F') != -1:
pcb_data[machine_index].loc[idx, 'nz'] = data.nz[0:pos:1] + data.nz[pos + 1::]
if isinstance(data.hd, str) and (pos := data.hd.find('F') != -1):
pcb_data[machine_index].loc[idx, 'hd'] = int(data.hd[pos + 2::])
part, nozzle = data.part, data.nz.split(' ')[1]
if part not in component_data[machine_index]['part'].values:
if not auto_register:
raise Exception("unregistered component: " + component_data[machine_index]['part'].values)
else:
component_data[machine_index] = pd.concat([component_data[machine_index], pd.DataFrame(
[part, 'SM8', nozzle, 0, 0], index=part_col).T], ignore_index=True)
# warning_info = 'register component ' + part + ' with default feeder type'
# warnings.warn(warning_info, UserWarning)
part_index = component_data[machine_index][component_data[machine_index]['part'] == part].index.tolist()[0]
component_data[machine_index].loc[part_index, 'points'] += 1
if (fdr := data['fdr'].split(' ')[0]) not in component_slot[part]:
component_data[machine_index].loc[part_index, 'fdn'] += 1
component_slot[part].add(fdr)
for idx, data in part_data.iterrows():
if data.part in component_slot.keys():
part_data.loc[idx, 'fdn'] = part_data.loc[idx, 'fdn'] - len(component_slot[data.part])
assert part_data.loc[idx, 'fdn'] >= 0
for idx, data in part_data.iterrows():
for machine_index in range(machine_num):
if data.part not in component_data[machine_index].part.values:
continue
part_index = component_data[machine_index][component_data[machine_index].part == data.part].index.tolist()[
0]
if component_data[machine_index].loc[part_index].nz != data.nz:
warning_info = 'the nozzle type of component ' + data.part + ' is not consistent with the pcb data'
warnings.warn(warning_info, UserWarning)
if data.fdn == 0:
continue
if data.part in component_data[0].part.values:
part_index = component_data[0][component_data[0].part == data.part].index.tolist()[0]
component_data[0].loc[part_index, 'fdn'] += data.fdn
else:
component_data[0] = pd.concat([component_data[0], pd.DataFrame(data).T], ignore_index=True)
for machine_index in range(machine_num):
for idx, data in component_data[machine_index].iterrows():
if data['fdr'][0:3] == 'SME': # 电动供料器和气动供料器参数一致
component_data[machine_index].at[idx, 'fdr'] = data['fdr'][0:2] + data['fdr'][3:]
# pcb_data[machine_index].sort_values(by="x", ascending=False, inplace=True)
# pcb_data[machine_index].reset_index(inplace=True)
# 读取供料器基座数据
feeder_data = defaultdict(pd.DataFrame)
if load_feeder:
for machine_index in range(machine_num):
feeder_data[machine_index] = pd.DataFrame(columns=['slot', 'part', 'arg']) # arg表示是否为预分配不表示分配数目
for _, data in pcb_data[machine_index].iterrows():
slot, part = data['fdr'].split(' ')
if slot[0] != 'F' and slot[0] != 'R':
continue
slot = int(slot[1:]) if slot[0] == 'F' else int(slot[1:]) + max_slot_index // 2
feeder_data[machine_index] = pd.concat([feeder_data[machine_index], pd.DataFrame([slot, part, 1]).T])
feeder_data[machine_index].drop_duplicates(subset='slot', inplace=True, ignore_index=True)
# 随机移除部分已安装的供料器
drop_index = random.sample(list(range(len(feeder_data))), len(feeder_data) // 2)
feeder_data[machine_index].drop(index=drop_index, inplace=True)
feeder_data[machine_index].sort_values(by='slot', ascending=True, inplace=True, ignore_index=True)
return pcb_data, component_data, feeder_data
def merge_data(partial_pcb_data, partial_component_data):
assert len(partial_pcb_data) == len(partial_component_data)
machine_num = len(partial_pcb_data)
pcb_data, component_data = copy.deepcopy(partial_pcb_data[0]), copy.deepcopy(partial_component_data[0])
for machine_index in range(1, machine_num):
pcb_data = pd.concat([pcb_data, partial_pcb_data[machine_index]], ignore_index=True)
for _, data in partial_component_data[machine_index].iterrows():
if data.part in component_data.part.values:
part_index = component_data[component_data.part == data.part].index.tolist()[0]
component_data.loc[part_index, 'points'] += data.points
component_data.loc[part_index, 'fdn'] += data.fdn
else:
component_data = pd.concat([component_data, pd.DataFrame(data).T], ignore_index=True)
component_data = component_data[component_data['points'] != 0].reset_index(drop=True)
return pcb_data, component_data