基于运营商投诉工单的分析系统设计与实现
基于运营商投诉工单的分析系统设计与实现
1. 系统概述
1.1 项目背景
随着电信行业的快速发展,运营商每天需要处理大量客户投诉工单。这些工单包含了宝贵的客户反馈信息,通过系统化分析可以发现问题趋势、改进服务质量、提升客户满意度。传统的手工处理方式效率低下,难以从海量数据中提取有价值的信息。
1.2 系统目标
本系统旨在开发一个基于Python的运营商投诉工单分析系统,实现以下目标:
- 自动化处理和分析投诉工单数据
- 生成多维度分析报告和可视化图表
- 自动发送邮件提醒至指定人员
- 提供友好的用户界面,方便非技术人员使用
- 支持定期自动执行分析任务
1.3 系统架构
系统采用模块化设计,主要包含以下组件:
- 数据输入模块:支持多种格式数据导入
- 数据处理模块:数据清洗、转换和预处理
- 分析引擎:多维度数据分析算法
- 可视化模块:图表生成和报告制作
- 邮件通知模块:自动发送分析结果
- 用户界面:图形化操作界面
- 任务调度模块:定期执行分析任务
2. 系统设计与实现
2.1 技术选型
- 编程语言:Python 3.8+
- 数据处理:Pandas, NumPy
- 数据分析:Scikit-learn, Statsmodels
- 数据可视化:Matplotlib, Seaborn, Plotly
- 邮件发送:smtplib, email
- 用户界面:PyQt5
- 任务调度:APScheduler
- 数据存储:SQLite, CSV/Excel文件
- 报告生成:Jinja2, ReportLab
2.2 系统模块设计
2.2.1 数据输入模块
import pandas as pd
import os
from datetime import datetimeclass DataInput:def __init__(self):self.supported_formats = ['.csv', '.xlsx', '.xls']def load_data(self, file_path):"""根据文件格式加载数据"""try:ext = os.path.splitext(file_path)[1].lower()if ext == '.csv':df = pd.read_csv(file_path, encoding='utf-8')elif ext in ['.xlsx', '.xls']:df = pd.read_excel(file_path)else:raise ValueError(f"不支持的文件格式: {ext}")# 记录数据加载信息self.log_data_load(df.shape, file_path)return dfexcept Exception as e:raise Exception(f"数据加载失败: {str(e)}")def log_data_load(self, shape, file_path):"""记录数据加载日志"""timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")log_msg = f"{timestamp} - 加载文件: {file_path}, 数据形状: {shape}"with open("system_log.txt", "a", encoding="utf-8") as f:f.write(log_msg + "\n")
2.2.2 数据处理模块
import re
from datetime import datetime, timedelta
import jieba
import jieba.analyseclass DataProcessor:def __init__(self):self.stopwords = self.load_stopwords()def load_stopwords(self):"""加载停用词表"""stopwords = set()try:with open("stopwords.txt", "r", encoding="utf-8") as f:for line in f:stopwords.add(line.strip())except FileNotFoundError:print("警告: 未找到停用词文件")return stopwordsdef clean_data(self, df):"""数据清洗"""# 去除完全空白的行df = df.dropna(how='all')# 处理重复数据df = df.drop_duplicates()# 处理缺失值for col in df.columns:if df[col].dtype == 'object':df[col] = df[col].fillna('未知')else:df[col] = df[col].fillna(0)return dfdef standardize_dates(self, df, date_columns):"""标准化日期格式"""for col in date_columns:if col in df.columns:try:df[col] = pd.to_datetime(df[col], errors='coerce')except:print(f"警告: 无法转换列 {col} 为日期格式")return dfdef extract_keywords(self, text, topK=10):"""从文本中提取关键词"""if pd.isna(text) or text == '':return []# 使用jieba提取关键词keywords = jieba.analyse.extract_tags(text, topK=topK)# 过滤停用词keywords = [kw for kw in keywords if kw not in self.stopwords]return keywordsdef categorize_complaints(self, df, text_column):"""对投诉内容进行分类"""categories = {'网络质量': ['信号', '断网', '网速', '延迟', '掉线', '覆盖', '基站'],'费用问题': ['收费', '套餐', '账单', '扣费', '价格', '资费', '退款'],'服务态度': ['态度', '服务', '客服', '不耐烦', '敷衍', '推诿'],'业务办理':办理', '开通', '取消', '变更', '签约', '解约'],'其他问题': []}def assign_category(text):if pd.isna(text):return '其他问题'text = str(text)for category, keywords in categories.items():if category == '其他问题':continuefor keyword in keywords:if keyword in text:return categoryreturn '其他问题'df['投诉类别'] = df[text_column].apply(assign_category)return df
2.2.3 分析引擎模块
import pandas as pd
import numpy as np
from collections import Counter
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.cluster import KMeans
import statsmodels.api as smclass AnalysisEngine:def __init__(self):self.vectorizer = TfidfVectorizer(max_features=1000)def time_analysis(self, df, date_column):"""时间维度分析"""time_stats = {}# 按小时分析df['小时'] = df[date_column].dt.hourhourly_counts = df['小时'].value_counts().sort_index()time_stats['hourly'] = hourly_counts.to_dict()# 按星期分析df['星期'] = df[date_column].dt.day_name()weekday_counts = df['星期'].value_counts()time_stats['weekday'] = weekday_counts.to_dict()# 按月分析df['月份'] = df[date_column].dt.monthmonthly_counts = df['月份'].value_counts().sort_index()time_stats['monthly'] = monthly_counts.to_dict()return time_statsdef category_analysis(self, df, category_column):"""投诉类别分析"""category_stats = {}# 各类别数量统计category_counts = df[category_column].value_counts()category_stats['counts'] = category_counts.to_dict()# 各类别占比category_percent = (category_counts / len(df) * 100).round(2)category_stats['percentage'] = category_percent.to_dict()return category_statsdef sentiment_analysis(self, df, text_column):"""简单情感分析"""# 情感词词典positive_words = ['好', '满意', '感谢', '快', '顺利', '解决', '专业', '耐心']negative_words = ['差', '慢', '不满意', '投诉', '问题', '故障', '无法', '不能', '失望']def analyze_sentiment(text):if pd.isna(text):return '中性'text = str(text)pos_count = sum(1 for word in positive_words if word in text)neg_count = sum(1 for word in negative_words if word in text)if pos_count > neg_count:return '正面'elif neg_count > pos_count:return '负面'else:return '中性'df['情感倾向'] = df[text_column].apply(analyze_sentiment)sentiment_counts = df['情感倾向'].value_counts()return {'counts': sentiment_counts.to_dict(),'percentage': (sentiment_counts / len(df) * 100).round(2).to_dict()}def trend_analysis(self, df, date_column):"""投诉趋势分析"""# 按周聚合数据df.set_index(date_column, inplace=True)weekly_trend = df.resample('W').size()# 计算移动平均moving_avg = weekly_trend.rolling(window=4).mean()return {'weekly_trend': weekly_trend.to_dict(),'moving_avg': moving_avg.to_dict()}def text_clustering(self, df, text_column, n_clusters=5):"""文本聚类分析"""# 提取TF-IDF特征texts = df[text_column].fillna('').astype(str).tolist()X = self.vectorizer.fit_transform(texts)# K-means聚类kmeans = KMeans(n_clusters=n_clusters, random_state=42)clusters = kmeans.fit_predict(X)df['聚类结果'] = clusters# 分析每个聚类的关键词cluster_keywords = {}for i in range(n_clusters):cluster_texts = [texts[j] for j in range(len(texts)) if clusters[j] == i]all_text = ' '.join(cluster_texts)# 提取关键词keywords = jieba.analyse.extract_tags(all_text, topK=10)cluster_keywords[f'聚类{i+1}'] = keywordsreturn {'cluster_distribution': Counter(clusters).most_common(),'cluster_keywords': cluster_keywords}
2.2.4 可视化模块
import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib.font_manager import FontProperties
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import pandas as pd
import osplt.rcParams['font.sans-serif'] = ['SimHei'] # 用来正常显示中文标签
plt.rcParams['axes.unicode_minus'] = False # 用来正常显示负号class Visualization:def __init__(self, output_dir="reports"):self.output_dir = output_dirif not os.path.exists(output_dir):os.makedirs(output_dir)def create_time_series_plot(self, time_stats, filename):"""创建时间序列图"""fig, axes = plt.subplots(2, 2, figsize=(15, 10))fig.suptitle('投诉工单时间分布分析', fontsize=16)# 小时分布hours = list(time_stats['hourly'].keys())values = list(time_stats['hourly'].values())axes[0, 0].bar(hours, values)axes[0, 0].set_title('按小时分布')axes[0, 0].set_xlabel('小时')axes[0, 0].set_ylabel('投诉数量')# 星期分布weekdays = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']weekday_values = [time_stats['weekday'].get(day, 0) for day in weekdays]axes[0, 1].bar(weekdays, weekday_values)axes[0, 1].set_title('按星期分布')axes[0, 1].tick_params(axis='x', rotation=45)# 月度分布months = list(time_stats['monthly'].keys())month_values = list(time_stats['monthly'].values())axes[1, 0].bar(months, month_values)axes[1, 0].set_title('按月分布')axes[1, 0].set_xlabel('月份')axes[1, 0].set_ylabel('投诉数量')plt.tight_layout()plt.savefig(os.path.join(self.output_dir, filename), dpi=300, bbox_inches='tight')plt.close()return os.path.join(self.output_dir, filename)def create_category_pie_chart(self, category_stats, filename):"""创建类别饼图"""labels = list(category_stats['percentage'].keys())sizes = list(category_stats['percentage'].values())fig, ax = plt.subplots(figsize=(10, 8))ax.pie(sizes, labels=labels, autopct='%1.1f%%', startangle=90)ax.axis('equal')ax.set_title('投诉类别分布')plt.savefig(os.path.join(self.output_dir, filename), dpi=300, bbox_inches='tight')plt.close()return os.path.join(self.output_dir, filename)def create_trend_plot(self, trend_data, filename):"""创建趋势图"""dates = list(trend_data['weekly_trend'].keys())values = list(trend_data['weekly_trend'].values())moving_avg = list(trend_data['moving_avg'].values())# 转换日期格式dates = [pd.to_datetime(str(date)) for date in dates]fig, ax = plt.subplots(figsize=(12, 6))ax.plot(dates, values, label='每周投诉量', alpha=0.7)ax.plot(dates, moving_avg, label='4周移动平均', linewidth=2)ax.set_title('投诉趋势分析')ax.set_xlabel('日期')ax.set_ylabel('投诉数量')ax.legend()ax.grid(True, alpha=0.3)plt.xticks(rotation=45)plt.tight_layout()plt.savefig(os.path.join(self.output_dir, filename), dpi=300, bbox_inches='tight')plt.close()return os.path.join(self.output_dir, filename)def create_interactive_dashboard(self, df, analysis_results, filename):"""创建交互式仪表板"""# 使用Plotly创建交互式图表fig = make_subplots(rows=2, cols=2,subplot_titles=('投诉类别分布', '时间趋势', '情感分析', '热点问题聚类'),specs=[[{"type": "pie"}, {"type": "xy"}],[{"type": "bar"}, {"type": "scatter"}]])# 投诉类别饼图categories = list(analysis_results['category_analysis']['percentage'].keys())percentages = list(analysis_results['category_analysis']['percentage'].values())fig.add_trace(go.Pie(labels=categories, values=percentages, name="类别分布"),row=1, col=1)# 时间趋势图dates = list(analysis_results['trend_analysis']['weekly_trend'].keys())values = list(analysis_results['trend_analysis']['weekly_trend'].values())fig.add_trace(go.Scatter(x=dates, y=values, mode='lines', name='每周投诉量'),row=1, col=2)# 情感分析柱状图sentiments = list(analysis_results['sentiment_analysis']['counts'].keys())sentiment_counts = list(analysis_results['sentiment_analysis']['counts'].values())fig.add_trace(go.Bar(x=sentiments, y=sentiment_counts, name='情感分析'),row=2, col=1)# 聚类分析散点图(简化表示)if 'clustering_analysis' in analysis_results:clusters = analysis_results['clustering_analysis']['cluster_distribution']cluster_labels = [f'聚类 {item[0]+1}' for item in clusters]cluster_sizes = [item[1] for item in clusters]fig.add_trace(go.Scatter(x=cluster_labels, y=cluster_sizes, mode='markers', marker=dict(size=cluster_sizes, sizemode='area', sizeref=2.*max(cluster_sizes)/(40.**2)),name='聚类分布'),row=2, col=2)fig.update_layout(height=800, width=1000, title_text="投诉工单分析仪表板")# 保存为HTML文件fig.write_html(os.path.join(self.output_dir, filename))return os.path.join(self.output_dir, filename)
2.2.5 邮件通知模块
import smtplib
from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText
from email.mime.base import MIMEBase
from email import encoders
import os
from datetime import datetimeclass EmailNotifier:def __init__(self, smtp_server, smtp_port, sender_email, sender_password):self.smtp_server = smtp_serverself.smtp_port = smtp_portself.sender_email = sender_emailself.sender_password = sender_passworddef send_email(self, recipient_emails, subject, body, attachments=None):"""发送邮件"""try:# 创建邮件对象msg = MIMEMultipart()msg['From'] = self.sender_emailmsg['To'] = ', '.join(recipient_emails)msg['Subject'] = subject# 添加邮件正文msg.attach(MIMEText(body, 'html', 'utf-8'))# 添加附件if attachments:for attachment_path in attachments:if os.path.exists(attachment_path):part = MIMEBase('application', 'octet-stream')with open(attachment_path, 'rb') as file:part.set_payload(file.read())encoders.encode_base64(part)part.add_header('Content-Disposition',f'attachment; filename="{os.path.basename(attachment_path)}"')msg.attach(part)# 连接SMTP服务器并发送邮件server = smtplib.SMTP(self.smtp_server, self.smtp_port)server.starttls()server.login(self.sender_email, self.sender_password)server.sendmail(self.sender_email, recipient_emails, msg.as_string())server.quit()print(f"邮件发送成功: {datetime.now()}")return Trueexcept Exception as e:print(f"邮件发送失败: {str(e)}")return Falsedef generate_email_content(self, analysis_results, report_date):"""生成邮件HTML内容"""total_complaints = analysis_results.get('total_complaints', 0)# 获取主要问题类别category_stats = analysis_results.get('category_analysis', {})main_category = max(category_stats.get('percentage', {}).items(), key=lambda x: x[1], default=('无数据', 0))# 获取情感分析结果sentiment_stats = analysis_results.get('sentiment_analysis', {})negative_percentage = sentiment_stats.get('percentage', {}).get('负面', 0)html_content = f"""<html><head><style>body {{ font-family: Arial, sans-serif; margin: 20px; }}.header {{ background-color: #f8f9fa; padding: 20px; border-radius: 5px; }}.summary {{ margin: 20px 0; }}.metric {{ background-color: #e9ecef; padding: 15px; border-radius: 5px; margin: 10px 0; }}.alert {{ color: #dc3545; font-weight: bold; }}</style></head><body><div class="header"><h2>运营商投诉工单分析报告</h2><p>报告生成时间: {report_date}</p></div><div class="summary"><h3>分析概要</h3><div class="metric"><p>总投诉量: <strong>{total_complaints}</strong> 件</p><p>主要问题类别: <strong>{main_category[0]}</strong> ({main_category[1]}%)</p><p>负面情感占比: <strong>{negative_percentage}%</strong></p></div></div><div class="details"><h3>详细分析</h3><p>本次分析涵盖了时间分布、问题类别、情感倾向和趋势预测等多个维度。</p><p>请查看附件获取完整分析报告和可视化图表。</p></div><div class="footer"><p>此邮件由运营商投诉工单分析系统自动生成,请勿直接回复。</p></div></body></html>"""return html_content
2.2.6 任务调度模块
from apscheduler.schedulers.background import BackgroundScheduler
from apscheduler.triggers.cron import CronTrigger
import datetime
import timeclass TaskScheduler:def __init__(self):self.scheduler = BackgroundScheduler()self.jobs = {}def schedule_daily_analysis(self, task_func, hour, minute, args=None):"""安排每日分析任务"""job_id = f"daily_analysis_{hour}_{minute}"trigger = CronTrigger(hour=hour, minute=minute)job = self.scheduler.add_job(task_func,trigger=trigger,args=args or [],id=job_id)self.jobs[job_id] = jobreturn job_iddef schedule_weekly_analysis(self, task_func, day_of_week, hour, minute, args=None):"""安排每周分析任务"""job_id = f"weekly_analysis_{day_of_week}_{hour}_{minute}"trigger = CronTrigger(day_of_week=day_of_week, hour=hour, minute=minute)job = self.scheduler.add_job(task_func,trigger=trigger,args=args or [],id=job_id)self.jobs[job_id] = jobreturn job_iddef run_once(self, task_func, args=None):"""立即运行一次任务"""return task_func(*(args or []))def start(self):"""启动调度器"""self.scheduler.start()print("任务调度器已启动")def shutdown(self):"""关闭调度器"""self.scheduler.shutdown()print("任务调度器已关闭")def list_jobs(self):"""列出所有计划任务"""return self.scheduler.get_jobs()
2.3 主程序与用户界面
2.3.1 主程序
import sys
import pandas as pd
from datetime import datetime
import jsonclass ComplaintAnalysisSystem:def __init__(self):self.data_input = DataInput()self.data_processor = DataProcessor()self.analysis_engine = AnalysisEngine()self.visualization = Visualization()self.email_notifier = Noneself.task_scheduler = TaskScheduler()# 配置参数self.config = self.load_config()def load_config(self):"""加载系统配置"""try:with open('config.json', 'r', encoding='utf-8') as f:return json.load(f)except FileNotFoundError:# 默认配置return {'smtp_server': 'smtp.example.com','smtp_port': 587,'sender_email': 'system@example.com','sender_password': 'password','recipient_emails': ['admin@example.com'],'data_columns': {'date_column': '投诉时间','text_column': '投诉内容','category_column': '投诉类别'}}def save_config(self):"""保存系统配置"""with open('config.json', 'w', encoding='utf-8') as f:json.dump(self.config, f, ensure_ascii=False, indent=4)def initialize_email_notifier(self):"""初始化邮件通知器"""if not self.email_notifier:self.email_notifier = EmailNotifier(self.config['smtp_server'],self.config['smtp_port'],self.config['sender_email'],self.config['sender_password'])def run_analysis(self, file_path):"""执行完整分析流程"""try:print("开始分析投诉工单数据...")# 1. 加载数据df = self.data_input.load_data(file_path)print(f"数据加载成功,共 {len(df)} 条记录")# 2. 数据清洗和处理df = self.data_processor.clean_data(df)# 标准化日期列date_column = self.config['data_columns']['date_column']if date_column in df.columns:df = self.data_processor.standardize_dates(df, [date_column])# 分类投诉内容text_column = self.config['data_columns']['text_column']if text_column in df.columns:df = self.data_processor.categorize_complaints(df, text_column)# 3. 数据分析analysis_results = {}# 时间分析if date_column in df.columns:analysis_results['time_analysis'] = self.analysis_engine.time_analysis(df, date_column)# 类别分析category_column = self.config['data_columns']['category_column']if category_column in df.columns:analysis_results['category_analysis'] = self.analysis_engine.category_analysis(df, category_column)# 情感分析if text_column in df.columns:analysis_results['sentiment_analysis'] = self.analysis_engine.sentiment_analysis(df, text_column)# 趋势分析if date_column in df.columns:analysis_results['trend_analysis'] = self.analysis_engine.trend_analysis(df, date_column)# 文本聚类if text_column in df.columns:analysis_results['clustering_analysis'] = self.analysis_engine.text_clustering(df, text_column)analysis_results['total_complaints'] = len(df)# 4. 生成可视化图表visualizations = {}if 'time_analysis' in analysis_results:time_plot = self.visualization.create_time_series_plot(analysis_results['time_analysis'], 'time_analysis.png')visualizations['time_analysis'] = time_plotif 'category_analysis' in analysis_results:category_plot = self.visualization.create_category_pie_chart(analysis_results['category_analysis'],'category_analysis.png')visualizations['category_analysis'] = category_plotif 'trend_analysis' in analysis_results:trend_plot = self.visualization.create_trend_plot(analysis_results['trend_analysis'],'trend_analysis.png')visualizations['trend_analysis'] = trend_plot# 生成交互式仪表板dashboard = self.visualization.create_interactive_dashboard(df, analysis_results, 'dashboard.html')visualizations['dashboard'] = dashboard# 5. 生成分析报告report_path = self.generate_report(analysis_results, visualizations, df)print("分析完成!")return {'success': True,'dataframe': df,'analysis_results': analysis_results,'visualizations': visualizations,'report_path': report_path}except Exception as e:print(f"分析过程中出错: {str(e)}")return {'success': False,'error': str(e)}def generate_report(self, analysis_results, visualizations, df):"""生成详细分析报告"""report_date = datetime.now().strftime("%Y-%m-%d %H:%M:%S")report_path = os.path.join(self.visualization.output_dir, f"analysis_report_{report_date[:10]}.pdf")# 这里可以使用ReportLab等库生成PDF报告# 简化实现:先生成HTML报告html_report = f"""<!DOCTYPE html><html><head><meta charset="UTF-8"><title>运营商投诉工单分析报告</title><style>body {{ font-family: Arial, sans-serif; margin: 40px; }}.header {{ text-align: center; margin-bottom: 30px; }}.section {{ margin-bottom: 30px; }}.chart {{ text-align: center; margin: 20px 0; }}img {{ max-width: 80%; height: auto; }}table {{ width: 100%; border-collapse: collapse; margin: 15px 0; }}th, td {{ border: 1px solid #ddd; padding: 8px; text-align: left; }}th {{ background-color: #f2f2f2; }}</style></head><body><div class="header"><h1>运营商投诉工单分析报告</h1><p>生成时间: {report_date}</p><p>总投诉量: {analysis_results['total_complaints']} 件</p></div><div class="section"><h2>1. 时间分布分析</h2><div class="chart"><img src="{visualizations.get('time_analysis', '')}" alt="时间分布分析"></div></div><div class="section"><h2>2. 问题类别分析</h2><div class="chart"><img src="{visualizations.get('category_analysis', '')}" alt="类别分析"></div></div><div class="section"><h2>3. 趋势分析</h2><div class="chart"><img src="{visualizations.get('trend_analysis', '')}" alt="趋势分析"></div></div><div class="section"><h2>4. 详细数据</h2><p>以下是前10条投诉记录:</p>{df.head(10).to_html()}</div></body></html>"""# 保存HTML报告html_report_path = os.path.join(self.visualization.output_dir, f"analysis_report_{report_date[:10]}.html")with open(html_report_path, 'w', encoding='utf-8') as f:f.write(html_report)return html_report_pathdef send_analysis_report(self, analysis_results, visualizations, recipient_emails=None):"""发送分析报告邮件"""try:self.initialize_email_notifier()if recipient_emails is None:recipient_emails = self.config['recipient_emails']report_date = datetime.now().strftime("%Y-%m-%d %H:%M:%S")subject = f"运营商投诉工单分析报告 - {report_date[:10]}"# 生成邮件内容body = self.email_notifier.generate_email_content(analysis_results, report_date)# 准备附件attachments = list(visualizations.values())# 发送邮件success = self.email_notifier.send_email(recipient_emails, subject, body, attachments)return successexcept Exception as e:print(f"发送邮件失败: {str(e)}")return Falsedef schedule_automatic_analysis(self, file_path, schedule_type='daily', time_str='09:00'):"""安排自动分析任务"""try:hour, minute = map(int, time_str.split(':'))if schedule_type == 'daily':job_id = self.task_scheduler.schedule_daily_analysis(self.run_scheduled_analysis, hour, minute, [file_path])elif schedule_type == 'weekly':# 默认每周一执行job_id = self.task_scheduler.schedule_weekly_analysis(self.run_scheduled_analysis, 'mon', hour, minute, [file_path])else:raise ValueError("不支持的调度类型")print(f"已安排{schedule_type}分析任务,任务ID: {job_id}")return job_idexcept Exception as e:print(f"安排任务失败: {str(e)}")return Nonedef run_scheduled_analysis(self, file_path):"""执行计划的分析任务"""print(f"开始执行计划分析任务: {datetime.now()}")result = self.run_analysis(file_path)if result['success']:# 发送邮件报告self.send_analysis_report(result['analysis_results'], result['visualizations'])print("计划分析任务完成并已发送邮件")else:print(f"计划分析任务失败: {result['error']}")def start_scheduler(self):"""启动任务调度器"""self.task_scheduler.start()def stop_scheduler(self):"""停止任务调度器"""self.task_scheduler.shutdown()
2.3.2 用户界面
import sys
from PyQt5.QtWidgets import (QApplication, QMainWindow, QWidget, QVBoxLayout, QHBoxLayout, QLabel, QLineEdit, QPushButton, QTextEdit, QFileDialog, QTabWidget, QGroupBox, QComboBox, QSpinBox, QTimeEdit,QListWidget, QMessageBox, QProgressBar)
from PyQt5.QtCore import QTimer, Qt
from PyQt5.QtGui import QFont, QPixmap
import matplotlib.pyplot as plt
from matplotlib.backends.backend_qt5agg import FigureCanvasQTAgg as FigureCanvasclass MainWindow(QMainWindow):def __init__(self):super().__init__()self.system = ComplaintAnalysisSystem()self.initUI()def initUI(self):self.setWindowTitle('运营商投诉工单分析系统')self.setGeometry(100, 100, 1000, 700)# 创建中心部件和主布局central_widget = QWidget()self.setCentralWidget(central_widget)main_layout = QVBoxLayout(central_widget)# 创建标签页tab_widget = QTabWidget()main_layout.addWidget(tab_widget)# 数据分析标签页analysis_tab = QWidget()self.setup_analysis_tab(analysis_tab)tab_widget.addTab(analysis_tab, "数据分析")# 计划任务标签页scheduling_tab = QWidget()self.setup_scheduling_tab(scheduling_tab)tab_widget.addTab(scheduling_tab, "计划任务")# 系统设置标签页settings_tab = QWidget()self.setup_settings_tab(settings_tab)tab_widget.addTab(settings_tab, "系统设置")# 状态栏self.statusBar().showMessage('就绪')# 启动任务调度器self.system.start_scheduler()def setup_analysis_tab(self, tab):layout = QVBoxLayout(tab)# 文件选择区域file_group = QGroupBox("数据文件")file_layout = QHBoxLayout(file_group)self.file_path_edit = QLineEdit()file_browse_btn = QPushButton("浏览...")file_browse_btn.clicked.connect(self.browse_file)file_layout.addWidget(QLabel("文件路径:"))file_layout.addWidget(self.file_path_edit)file_layout.addWidget(file_browse_btn)layout.addWidget(file_group)# 分析按钮analyze_btn = QPushButton("开始分析")analyze_btn.clicked.connect(self.run_analysis)layout.addWidget(analyze_btn)# 进度条self.progress_bar = QProgressBar()self.progress_bar.setVisible(False)layout.addWidget(self.progress_bar)# 结果显示区域result_group = QGroupBox("分析结果")result_layout = QVBoxLayout(result_group)self.result_text = QTextEdit()self.result_text.setReadOnly(True)result_layout.addWidget(self.result_text)# 发送邮件按钮email_btn = QPushButton("发送邮件报告")email_btn.clicked.connect(self.send_email_report)result_layout.addWidget(email_btn)layout.addWidget(result_group)def setup_scheduling_tab(self, tab):layout = QVBoxLayout(tab)# 计划任务设置schedule_group = QGroupBox("计划任务设置")schedule_layout = QVBoxLayout(schedule_group)# 任务类型选择type_layout = QHBoxLayout()type_layout.addWidget(QLabel("任务类型:"))self.schedule_type = QComboBox()self.schedule_type.addItems(["每日", "每周"])type_layout.addWidget(self.schedule_type)type_layout.addStretch()schedule_layout.addLayout(type_layout)# 执行时间设置time_layout = QHBoxLayout()time_layout.addWidget(QLabel("执行时间:"))self.schedule_time = QTimeEdit()self.schedule_time.setTime(QTime(9, 0)) # 默认9:00time_layout.addWidget(self.schedule_time)time_layout.addStretch()schedule_layout.addLayout(time_layout)# 文件路径file_layout = QHBoxLayout()file_layout.addWidget(QLabel("数据文件:"))self.schedule_file_edit = QLineEdit()schedule_file_btn = QPushButton("浏览...")schedule_file_btn.clicked.connect(self.browse_schedule_file)file_layout.addWidget(self.schedule_file_edit)file_layout.addWidget(schedule_file_btn)schedule_layout.addLayout(file_layout)# 添加任务按钮add_schedule_btn = QPushButton("添加计划任务")add_schedule_btn.clicked.connect(self.add_scheduled_task)schedule_layout.addWidget(add_schedule_btn)layout.addWidget(schedule_group)# 现有任务列表tasks_group = QGroupBox("现有计划任务")tasks_layout = QVBoxLayout(tasks_group)self.tasks_list = QListWidget()tasks_layout.addWidget(self.tasks_list)# 刷新任务列表按钮refresh_btn = QPushButton("刷新任务列表")refresh_btn.clicked.connect(self.refresh_tasks_list)tasks_layout.addWidget(refresh_btn)layout.addWidget(tasks_group)def setup_settings_tab(self, tab):layout = QVBoxLayout(tab)# SMTP设置smtp_group = QGroupBox("邮件服务器设置")smtp_layout = QVBoxLayout(smtp_group)server_layout = QHBoxLayout()server_layout.addWidget(QLabel("SMTP服务器:"))self.smtp_server_edit = QLineEdit(self.system.config['smtp_server'])server_layout.addWidget(self.smtp_server_edit)smtp_layout.addLayout(server_layout)port_layout = QHBoxLayout()port_layout.addWidget(QLabel("端口:"))self.smtp_port_edit = QLineEdit(str(self.system.config['smtp_port']))port_layout.addWidget(self.smtp_port_edit)smtp_layout.addLayout(port_layout)email_layout = QHBoxLayout()email_layout.addWidget(QLabel("发件邮箱:"))self.sender_email_edit = QLineEdit(self.system.config['sender_email'])email_layout.addWidget(self.sender_email_edit)smtp_layout.addLayout(email_layout)password_layout = QHBoxLayout()password_layout.addWidget(QLabel("密码/授权码:"))self.sender_password_edit = QLineEdit(self.system.config['sender_password'])self.sender_password_edit.setEchoMode(QLineEdit.Password)password_layout.addWidget(self.sender_password_edit)smtp_layout.addLayout(password_layout)layout.addWidget(smtp_group)# 收件人设置recipients_group = QGroupBox("收件人设置")recipients_layout = QVBoxLayout(recipients_group)recipients_edit_layout = QHBoxLayout()recipients_edit_layout.addWidget(QLabel("收件人邮箱(多个用逗号分隔):"))self.recipients_edit = QLineEdit(','.join(self.system.config['recipient_emails']))recipients_edit_layout.addWidget(self.recipients_edit)recipients_layout.addLayout(recipients_edit_layout)layout.addWidget(recipients_group)# 保存设置按钮save_btn = QPushButton("保存设置")save_btn.clicked.connect(self.save_settings)layout.addWidget(save_btn)def browse_file(self):file_path, _ = QFileDialog.getOpenFileName(self, "选择数据文件", "", "支持的文件格式 (*.csv *.xlsx *.xls)")if file_path:self.file_path_edit.setText(file_path)def browse_schedule_file(self):file_path, _ = QFileDialog.getOpenFileName(self, "选择数据文件", "", "支持的文件格式 (*.csv *.xlsx *.xls)")if file_path:self.schedule_file_edit.setText(file_path)def run_analysis(self):file_path = self.file_path_edit.text()if not file_path:QMessageBox.warning(self, "警告", "请先选择数据文件")return# 显示进度条self.progress_bar.setVisible(True)self.progress_bar.setRange(0, 0) # 不确定进度# 使用定时器延迟执行,避免界面冻结QTimer.singleShot(100, lambda: self.execute_analysis(file_path))def execute_analysis(self, file_path):try:result = self.system.run_analysis(file_path)if result['success']:self.result_text.setText("分析完成!\n\n")self.result_text.append(f"总投诉量: {result['analysis_results']['total_complaints']} 条\n")# 显示类别分析结果if 'category_analysis' in result['analysis_results']:categories = result['analysis_results']['category_analysis']['percentage']self.result_text.append("问题类别分布:\n")for category, percentage in categories.items():self.result_text.append(f" {category}: {percentage}%")# 保存结果引用self.last_analysis_result = resultelse:self.result_text.setText(f"分析失败: {result['error']}")except Exception as e:self.result_text.setText(f"分析过程中出错: {str(e)}")finally:self.progress_bar.setVisible(False)def send_email_report(self):if not hasattr(self, 'last_analysis_result') or not self.last_analysis_result['success']:QMessageBox.warning(self, "警告", "请先完成分析")returntry:success = self.system.send_analysis_report(self.last_analysis_result['analysis_results'],self.last_analysis_result['visualizations'])if success:QMessageBox.information(self, "成功", "邮件发送成功")else:QMessageBox.warning(self, "警告", "邮件发送失败")except Exception as e:QMessageBox.critical(self, "错误", f"发送邮件时出错: {str(e)}")def add_scheduled_task(self):file_path = self.schedule_file_edit.text()if not file_path:QMessageBox.warning(self, "警告", "请先选择数据文件")returnschedule_type = 'daily' if self.schedule_type.currentText() == '每日' else 'weekly'time_str = self.schedule_time.time().toString('HH:mm')job_id = self.system.schedule_automatic_analysis(file_path, schedule_type, time_str)if job_id:QMessageBox.information(self, "成功", f"已添加计划任务: {job_id}")self.refresh_tasks_list()else:QMessageBox.warning(self, "警告", "添加计划任务失败")def refresh_tasks_list(self):self.tasks_list.clear()jobs = self.system.task_scheduler.list_jobs()for job in jobs:self.tasks_list.addItem(f"{job.id} - 下次执行: {job.next_run_time}")def save_settings(self):try:# 更新配置self.system.config['smtp_server'] = self.smtp_server_edit.text()self.system.config['smtp_port'] = int(self.smtp_port_edit.text())self.system.config['sender_email'] = self.sender_email_edit.text()self.system.config['sender_password'] = self.sender_password_edit.text()self.system.config['recipient_emails'] = [email.strip() for email in self.recipients_edit.text().split(',')]# 保存配置self.system.save_config()# 重新初始化邮件通知器self.system.initialize_email_notifier()QMessageBox.information(self, "成功", "设置已保存")except Exception as e:QMessageBox.critical(self, "错误", f"保存设置时出错: {str(e)}")def closeEvent(self, event):# 停止任务调度器self.system.stop_scheduler()event.accept()def main():app = QApplication(sys.argv)window = MainWindow()window.show()sys.exit(app.exec_())if __name__ == '__main__':main()
3. 系统部署与使用
3.1 环境配置
- 安装Python 3.8或更高版本
- 安装所需依赖库:
pip install pandas numpy scikit-learn statsmodels matplotlib seaborn plotly jieba PyQt5 apscheduler openpyxl
3.2 配置文件
创建config.json
文件配置系统参数:
{"smtp_server": "smtp.qq.com","smtp_port": 587,"sender_email": "your_email@qq.com","sender_password": "your_authorization_code","recipient_emails": ["recipient1@example.com", "recipient2@example.com"],"data_columns": {"date_column": "投诉时间","text_column": "投诉内容","category_column": "投诉类别"}
}
3.3 数据格式要求
系统支持CSV和Excel格式的数据文件,数据应包含以下基本字段:
- 投诉时间:日期时间格式
- 投诉内容:文本格式
- 客户信息:可选
- 处理状态:可选
3.4 使用流程
- 启动系统:运行
python main.py
- 在"数据分析"标签页选择数据文件并执行分析
- 查看分析结果并可选择发送邮件报告
- 在"计划任务"标签页设置定期自动分析任务
- 在"系统设置"标签页配置邮件服务器和收件人
4. 系统测试与优化
4.1 功能测试
对系统的各个模块进行详细测试,包括:
- 数据加载和处理功能
- 分析算法准确性
- 可视化图表生成
- 邮件发送功能
- 任务调度功能
4.2 性能测试
测试系统处理不同规模数据时的性能表现:
- 小规模数据(<1,000条)
- 中等规模数据(1,000-10,000条)
- 大规模数据(>10,000条)
4.3 优化策略
根据测试结果进行系统优化:
- 数据预处理优化
- 算法效率提升
- 内存使用优化
- 并行处理实现
5. 总结与展望
5.1 系统总结
本系统实现了运营商投诉工单的自动化分析,具有以下特点:
- 完整的数据处理和分析流程
- 多维度分析结果和可视化展示
- 自动邮件报告功能
- 友好的图形用户界面
- 灵活的任务调度机制
5.2 未来扩展
系统可进一步扩展以下功能:
- 集成机器学习模型进行预测分析
- 支持更多数据源和数据库连接
- 增加实时数据监控和分析
- 提供API接口供其他系统调用
- 增加多语言支持
5.3 实际应用价值
本系统可帮助运营商:
- 快速识别问题趋势和热点问题
- 提高客户投诉处理效率
- 优化服务质量和客户满意度
- 数据驱动的决策支持
通过本系统的实施,运营商可以更加高效地处理和分析客户投诉工单,提升服务质量,增强客户满意度,从而在竞争激烈的电信市场中保持竞争优势。