excle中匹配加密手机号(同sheet中)
匹配excle
数据放在excle中同一个sheet中,列名为"手机号", “加密手机号”
import pandas as pd
import re
import osdef match_phones(excel_path):"""匹配原始手机号和加密手机号(从同一个sheet读取)参数:excel_path: Excel文件路径"""try:# 1. 读取Excel文件(只读取第一个sheet)df = pd.read_excel(excel_path)print("✅ Excel文件读取成功")print(f"工作表列名: {df.columns.tolist()}")except Exception as e:print(f"❌ 读取Excel文件失败: {e}")return# 2. 检查列名required_columns = ["手机号", "加密手机号"]if not all(col in df.columns for col in required_columns):print("❌ Excel文件缺少必要的列名")print(f"现有列名: {df.columns.tolist()}")print(f"需要的列名: {required_columns}")return# 3. 提取数据df_original = df[["手机号"]].copy()df_encrypted = df[["加密手机号"]].copy()# 4. 重命名列df_original.columns = ["original_phone"] # 原始手机号df_encrypted.columns = ["encrypted_phone"] # 加密手机号# 5. 清洗数据df_original["original_phone"] = df_original["original_phone"].astype(str).str.strip() # 转为字符串并去除空格df_encrypted["encrypted_phone"] = df_encrypted["encrypted_phone"].astype(str).str.strip() # 转为字符串并去除空格# 6. 提取特征# 原始手机号的前3位和后4位df_original["prefix"] = df_original["original_phone"].str[:3]df_original["suffix"] = df_original["original_phone"].str[-4:]# 加密手机号的前3位和后4位def extract_encrypted_parts(phone):"""提取加密手机号的前3位和后4位"""match = re.match(r"(\d{3})\*{4}(\d{4})", phone)if match:return match.groups()return None, Nonedf_encrypted[["prefix", "suffix"]] = df_encrypted["encrypted_phone"].apply(lambda x: pd.Series(extract_encrypted_parts(x)))# 7. 匹配手机号result = []for idx, row in df_encrypted.iterrows():encrypted = row["encrypted_phone"]prefix = row["prefix"]suffix = row["suffix"]# 检查是否成功提取前缀和后缀if pd.isna(prefix) or pd.isna(suffix):result.append({"原始手机号": df_original.loc[idx, "original_phone"],"加密手机号": encrypted,"匹配结果": "格式错误","匹配数量": 0})continue# 在原始手机号中查找匹配matches = df_original[(df_original["prefix"] == prefix) &(df_original["suffix"] == suffix)]["original_phone"].tolist()# 格式化匹配结果if not matches:matched_str = "无匹配"elif len(matches) == 1:matched_str = matches[0]else:matched_str = ", ".join(matches)result.append({"原始手机号": df_original.loc[idx, "original_phone"],"加密手机号": encrypted,"匹配结果": matched_str,"匹配数量": len(matches)})# 8. 创建结果DataFrameresult_df = pd.DataFrame(result)# 9. 保存结果output_path = os.path.splitext(excel_path)[0] + "_匹配结果.xlsx"result_df.to_excel(output_path, index=False)print(f"✅ 匹配完成! 结果已保存到: {output_path}")return result_dfif __name__ == "__main__":# 替换为你的Excel文件路径excel_file = "cx.xlsx"# 检查文件是否存在if not os.path.exists(excel_file):print(f"❌ 文件不存在: {excel_file}")else:match_phones(excel_file)