用python实现汉字转拼音工具
用python实现汉字转拼音工具
主要功能特点:
- 多种拼音风格选择(带声调符号、数字声调、无声调)
- 输出模式:可以选择“普通模式”(仅拼音)或“拼音注音”(每个汉字的拼音显示在上方)
- 可自定义分隔符,可以自定义拼音之间的分隔符(默认空格)
- 标点符号保留选项,关闭时会自动过滤非汉字字符
- 支持文件导入/导出(文本文件)
- 一键复制结果到剪贴板
基本实现
程序需要你安装pypinyin和pyperclip这两个第三方库/模块(若你未安装的话)。
pypinyin:用于将汉字转换为拼音。
pyperclip:跨平台的剪贴板操作库,可以用来复制和粘贴文本到系统的剪贴板。
还用到了Python 的一些标准库,它们通常不需要安装,一般会随 Python 一起安装。
运行界面如下:
源码如下:
import tkinter as tk
from tkinter import ttk, filedialog, messagebox, font
from pypinyin import pinyin, Style
import pyperclip
import os
import reclass PinyinConverter:def __init__(self, root):self.root = rootself.root.title("汉字转拼音工具")self.setup_ui()self.set_default_values()def set_default_values(self):self.style_var.set('带声调')self.separator_var.set(' ')#self.handle_polyphonic_var.set(0)self.keep_punctuation_var.set(1)self.output_mode_var.set('普通模式')def setup_ui(self):# 输入区域input_frame = ttk.LabelFrame(self.root, text="输入文本")input_frame.grid(row=0, column=0, padx=10, pady=5, sticky='nsew')self.input_text = tk.Text(input_frame, height=10, width=50, wrap='word')self.input_text.pack(padx=5, pady=5, fill='both', expand=True)# 控制面板control_frame = ttk.Frame(self.root)control_frame.grid(row=1, column=0, padx=10, pady=5, sticky='ew')# 样式设置style_frame = ttk.LabelFrame(control_frame, text="转换设置")style_frame.pack(side=tk.LEFT, padx=5, pady=2)# 拼音样式ttk.Label(style_frame, text="拼音风格:").grid(row=0, column=0, sticky='w')self.style_var = tk.StringVar()styles = {'带声调': Style.TONE,'数字声调': Style.TONE3,'无声调': Style.NORMAL}self.style_combobox = ttk.Combobox(style_frame, textvariable=self.style_var,values=list(styles.keys()), state='readonly')self.style_combobox.grid(row=0, column=1, padx=2)# 输出模式ttk.Label(style_frame, text="输出模式:").grid(row=1, column=0, sticky='w')self.output_mode_var = tk.StringVar()modes = ['普通模式', '拼音注音']self.mode_combobox = ttk.Combobox(style_frame, textvariable=self.output_mode_var,values=modes, state='readonly')self.mode_combobox.grid(row=1, column=1, padx=2)# 分隔符ttk.Label(style_frame, text="分隔符:").grid(row=2, column=0, sticky='w')self.separator_var = tk.StringVar()ttk.Entry(style_frame, textvariable=self.separator_var, width=3).grid(row=2, column=1, sticky='w')# 高级选项
## self.handle_polyphonic_var = tk.IntVar()
## ttk.Checkbutton(
## style_frame, text="处理多音字",
## variable=self.handle_polyphonic_var).grid(row=3, column=0, columnspan=2, sticky='w')self.keep_punctuation_var = tk.IntVar()ttk.Checkbutton(style_frame, text="保留标点",variable=self.keep_punctuation_var).grid(row=4, column=0, columnspan=2, sticky='w')# 操作按钮btn_frame = ttk.Frame(control_frame)btn_frame.pack(side=tk.RIGHT, padx=5)ttk.Button(btn_frame, text="转换", command=self.convert).pack(side=tk.TOP, fill=tk.X)ttk.Button(btn_frame, text="清空", command=self.clear_text).pack(side=tk.TOP, fill=tk.X, pady=2)ttk.Button(btn_frame, text="复制结果", command=self.copy_result).pack(side=tk.TOP, fill=tk.X)ttk.Button(btn_frame, text="导入文件", command=self.import_file).pack(side=tk.TOP, fill=tk.X, pady=2)ttk.Button(btn_frame, text="导出结果", command=self.export_result).pack(side=tk.TOP, fill=tk.X)# 输出区域output_frame = ttk.LabelFrame(self.root, text="拼音结果")output_frame.grid(row=2, column=0, padx=10, pady=5, sticky='nsew')self.output_text = tk.Text(output_frame, height=10, width=50, wrap='word')self.output_text.pack(padx=5, pady=5, fill='both', expand=True)# 配置标签和字体# 配置标签和字体 - 修正这里的font导入问题self.pinyin_font = font.Font(family="Arial", size=10) # 使用正确导入的fontself.hanzi_font = font.Font(family="SimSun", size=12)self.output_text.tag_configure("pinyin", font=self.pinyin_font, foreground="blue")self.output_text.tag_configure("hanzi", font=self.hanzi_font)# 布局配置self.root.columnconfigure(0, weight=1)self.root.rowconfigure(0, weight=1)self.root.rowconfigure(2, weight=1)def convert(self):try:input_str = self.input_text.get("1.0", tk.END).strip()if not input_str:returnstyle_mapping = {'带声调': Style.TONE,'数字声调': Style.TONE3,'无声调': Style.NORMAL}style = style_mapping[self.style_var.get()]separator = self.separator_var.get()keep_punctuation = bool(self.keep_punctuation_var.get())output_mode = self.output_mode_var.get()self.output_text.delete(1.0, tk.END)if output_mode == '普通模式':# 普通模式:直接输出拼音pinyin_list = pinyin(input_str,style=style,# heteronym=bool(self.handle_polyphonic_var.get()),errors='ignore' if not keep_punctuation else lambda x: x)result = []for word in pinyin_list:selected = word[0] if word else ''result.append(selected + separator)self.output_text.insert(tk.END, ''.join(result).strip())else:# 拼音注音模式# 将文本分割成连续的汉字段落和非汉字段落segments = []current_segment = ""current_type = None # 0 for non-Chinese, 1 for Chinesefor char in input_str:is_chinese = '\u4e00' <= char <= '\u9fff'char_type = 1 if is_chinese else 0if current_type is None:current_type = char_typecurrent_segment = charelif current_type == char_type:current_segment += charelse:segments.append((current_segment, current_type))current_segment = charcurrent_type = char_typeif current_segment:segments.append((current_segment, current_type))# 处理每个段落for segment_text, is_chinese in segments:if is_chinese:# 处理汉字段落py_results = pinyin(segment_text,style=style,# heteronym=bool(self.handle_polyphonic_var.get()))# 创建拼音行py_line = ""for py in py_results:py_text = py[0] if py else ''py_line += py_text + " "# 创建汉字行,确保每个汉字有足够空间对应拼音hz_line = ""for char in segment_text:hz_line += char + " "# 插入拼音和汉字self.output_text.insert(tk.END, py_line + "\n", "pinyin")self.output_text.insert(tk.END, hz_line + "\n", "hanzi")else:# 处理非汉字段落self.output_text.insert(tk.END, segment_text + "\n")except Exception as e:import tracebackerror_details = traceback.format_exc()messagebox.showerror("错误", f"转换失败: {str(e)}\n\n详细信息:\n{error_details}")def clear_text(self):self.input_text.delete(1.0, tk.END)self.output_text.delete(1.0, tk.END)def copy_result(self):result = self.output_text.get(1.0, tk.END).strip()if result:pyperclip.copy(result)messagebox.showinfo("成功", "已复制到剪贴板")def import_file(self):file_path = filedialog.askopenfilename(filetypes=[("文本文件", "*.txt"), ("所有文件", "*.*")])if file_path:try:with open(file_path, 'r', encoding='utf-8') as f:content = f.read()self.input_text.delete(1.0, tk.END)self.input_text.insert(tk.END, content)except Exception as e:messagebox.showerror("错误", f"文件读取失败: {str(e)}")def export_result(self):result = self.output_text.get(1.0, tk.END).strip()if not result:returnfile_path = filedialog.asksaveasfilename(defaultextension=".txt",filetypes=[("文本文件", "*.txt"), ("所有文件", "*.*")])if file_path:try:with open(file_path, 'w', encoding='utf-8') as f:f.write(result)messagebox.showinfo("成功", "文件保存成功")except Exception as e:messagebox.showerror("错误", f"文件保存失败: {str(e)}")if __name__ == "__main__":root = tk.Tk()app = PinyinConverter(root)root.mainloop()
改进版
在“拼音注音”模式下,拼音和汉字之间的对齐处理计较困难。
使用HTML5来实现汉字转拼音工具(特别是拼音注音功能)更加容易和灵活,浏览器中的渲染效果更好。
这个改进版方案,当用户选择"拼音注音"模式并点击转换后,程序会生成HTML文件并保存到临时目录。用户可以点击"在浏览器中查看"按钮来查看渲染效果。结果也会显示在文本区域中(以HTML源码形式)。
这种方法避免了在Tkinter窗口中嵌入复杂渲染引擎的问题,充分利用了系统浏览器的强大功能,同时保持了程序的简单性和稳定性。
运行界面如下:
源码如下:
import tkinter as tk
from tkinter import ttk, filedialog, messagebox
from pypinyin import pinyin, Style
import pyperclip
import os
import re
import webbrowser
import tempfileclass PinyinConverter:def __init__(self, root):self.root = rootself.root.title("汉字转拼音工具")self.setup_ui()self.set_default_values()self.temp_html_file = Nonedef set_default_values(self):self.style_var.set('带声调')self.separator_var.set(' ')# self.handle_polyphonic_var.set(0)self.keep_punctuation_var.set(1)self.output_mode_var.set('普通模式')def setup_ui(self):# 输入区域input_frame = ttk.LabelFrame(self.root, text="输入文本")input_frame.grid(row=0, column=0, padx=10, pady=5, sticky='nsew')self.input_text = tk.Text(input_frame, height=10, width=50, wrap='word')self.input_text.pack(padx=5, pady=5, fill='both', expand=True)# 控制面板control_frame = ttk.Frame(self.root)control_frame.grid(row=1, column=0, padx=10, pady=5, sticky='ew')# 样式设置style_frame = ttk.LabelFrame(control_frame, text="转换设置")style_frame.pack(side=tk.LEFT, padx=5, pady=2)# 拼音样式ttk.Label(style_frame, text="拼音风格:").grid(row=0, column=0, sticky='w')self.style_var = tk.StringVar()styles = {'带声调': Style.TONE,'数字声调': Style.TONE3,'无声调': Style.NORMAL}self.style_combobox = ttk.Combobox(style_frame, textvariable=self.style_var,values=list(styles.keys()), state='readonly')self.style_combobox.grid(row=0, column=1, padx=2)# 输出模式ttk.Label(style_frame, text="输出模式:").grid(row=1, column=0, sticky='w')self.output_mode_var = tk.StringVar()modes = ['普通模式', '拼音注音']self.mode_combobox = ttk.Combobox(style_frame, textvariable=self.output_mode_var,values=modes, state='readonly')self.mode_combobox.grid(row=1, column=1, padx=2)# 分隔符ttk.Label(style_frame, text="分隔符:").grid(row=2, column=0, sticky='w')self.separator_var = tk.StringVar()ttk.Entry(style_frame, textvariable=self.separator_var, width=3).grid(row=2, column=1, sticky='w')# 高级选项
## self.handle_polyphonic_var = tk.IntVar()
## ttk.Checkbutton(
## style_frame, text="处理多音字",
## variable=self.handle_polyphonic_var).grid(row=3, column=0, columnspan=2, sticky='w')self.keep_punctuation_var = tk.IntVar()ttk.Checkbutton(style_frame, text="保留标点",variable=self.keep_punctuation_var).grid(row=4, column=0, columnspan=2, sticky='w')# 操作按钮btn_frame = ttk.Frame(control_frame)btn_frame.pack(side=tk.RIGHT, padx=5)ttk.Button(btn_frame, text="转换", command=self.convert).pack(side=tk.TOP, fill=tk.X)ttk.Button(btn_frame, text="清空", command=self.clear_text).pack(side=tk.TOP, fill=tk.X, pady=2)ttk.Button(btn_frame, text="复制结果", command=self.copy_result).pack(side=tk.TOP, fill=tk.X)ttk.Button(btn_frame, text="导入文件", command=self.import_file).pack(side=tk.TOP, fill=tk.X, pady=2)ttk.Button(btn_frame, text="导出结果", command=self.export_result).pack(side=tk.TOP, fill=tk.X)ttk.Button(btn_frame, text="在浏览器中查看", command=self.view_in_browser).pack(side=tk.TOP, fill=tk.X, pady=2)# 输出区域output_frame = ttk.LabelFrame(self.root, text="拼音结果")output_frame.grid(row=2, column=0, padx=10, pady=5, sticky='nsew')self.output_text = tk.Text(output_frame, height=10, width=50, wrap='word')self.output_text.pack(padx=5, pady=5, fill='both', expand=True)# 布局配置self.root.columnconfigure(0, weight=1)self.root.rowconfigure(0, weight=1)self.root.rowconfigure(2, weight=1)def convert(self):try:input_str = self.input_text.get("1.0", tk.END).strip()if not input_str:returnstyle_mapping = {'带声调': Style.TONE,'数字声调': Style.TONE3,'无声调': Style.NORMAL}style = style_mapping[self.style_var.get()]separator = self.separator_var.get()keep_punctuation = bool(self.keep_punctuation_var.get())output_mode = self.output_mode_var.get()self.output_text.delete(1.0, tk.END)if output_mode == '普通模式':# 普通模式:直接输出拼音pinyin_list = pinyin(input_str,style=style,# heteronym=bool(self.handle_polyphonic_var.get()),errors='ignore' if not keep_punctuation else lambda x: x)result = []for word in pinyin_list:selected = word[0] if word else ''result.append(selected + separator)result_text = ''.join(result).strip()self.output_text.insert(tk.END, result_text)else:# 拼音注音模式:生成HTMLhtml_content = self.generate_html_with_pinyin(input_str, style, # handle_polyphonic=bool(self.handle_polyphonic_var.get()))# 在文本框中显示HTML源码self.output_text.insert(tk.END, html_content)# 保存HTML到临时文件,准备用浏览器查看if self.temp_html_file:try:os.unlink(self.temp_html_file)except:passfd, self.temp_html_file = tempfile.mkstemp(suffix='.html')with os.fdopen(fd, 'w', encoding='utf-8') as f:f.write(html_content)# 自动在浏览器中打开查看效果messagebox.showinfo("提示", "HTML已生成,点击“在浏览器中查看”按钮可以打开浏览器查看效果")except Exception as e:import tracebackerror_details = traceback.format_exc()messagebox.showerror("错误", f"转换失败: {str(e)}\n\n详细信息:\n{error_details}")def generate_html_with_pinyin(self, text, style, handle_polyphonic=False):"""生成带有拼音注音的HTML文档"""html_content = ['<!DOCTYPE html>','<html lang="zh-CN">','<head>','<meta charset="UTF-8">','<title>汉字拼音注音</title>','<style>','body {',' font-family: "Microsoft YaHei", SimSun, sans-serif;',' line-height: 2;',' margin: 20px;',' background-color: #f9f9f9;','}','ruby {',' display: inline-flex;',' flex-direction: column-reverse;',' text-align: center;',' margin: 0 2px;','}','rt {',' font-size: 0.7em;',' color: #0066cc;',' line-height: 1.2;',' text-align: center;',' font-weight: normal;','}','.container {',' background-color: white;',' padding: 20px;',' border-radius: 5px;',' box-shadow: 0 2px 5px rgba(0,0,0,0.1);',' max-width: 800px;',' margin: 0 auto;','}','.non-chinese {',' display: inline-block;','}','</style>','</head>','<body>','<div class="container">']# 将文本分割成连续的汉字段落和非汉字段落segments = []current_segment = ""current_type = None # 0 for non-Chinese, 1 for Chinesefor char in text:is_chinese = '\u4e00' <= char <= '\u9fff'char_type = 1 if is_chinese else 0if current_type is None:current_type = char_typecurrent_segment = charelif current_type == char_type:current_segment += charelse:segments.append((current_segment, current_type))current_segment = charcurrent_type = char_typeif current_segment:segments.append((current_segment, current_type))# 处理每个段落for segment_text, is_chinese in segments:if is_chinese:# 处理汉字段落py_results = pinyin(segment_text,style=style,# heteronym=handle_polyphonic)# 为每个汉字创建ruby标签for i, (char, py) in enumerate(zip(segment_text, py_results)):py_text = py[0] if py else ''html_content.append(f'<ruby>{char}<rt>{py_text}</rt></ruby>')else:# 处理非汉字段落html_content.append(f'<span class="non-chinese">{segment_text}</span>')# 完成HTMLhtml_content.extend(['</div>','</body>','</html>'])return '\n'.join(html_content)def view_in_browser(self):"""在浏览器中打开HTML文件"""if not self.temp_html_file or not os.path.exists(self.temp_html_file):if self.output_mode_var.get() == '拼音注音':self.convert() # 重新生成HTMLelse:messagebox.showinfo("提示", "请先切换到拼音注音模式并执行转换")returnif self.temp_html_file and os.path.exists(self.temp_html_file):webbrowser.open('file://' + os.path.abspath(self.temp_html_file))else:messagebox.showerror("错误", "HTML文件不存在或无法访问")def clear_text(self):self.input_text.delete(1.0, tk.END)self.output_text.delete(1.0, tk.END)# 删除临时HTML文件if self.temp_html_file and os.path.exists(self.temp_html_file):try:os.unlink(self.temp_html_file)self.temp_html_file = Noneexcept:passdef copy_result(self):result = self.output_text.get(1.0, tk.END).strip()if result:pyperclip.copy(result)messagebox.showinfo("成功", "已复制到剪贴板")def import_file(self):file_path = filedialog.askopenfilename(filetypes=[("文本文件", "*.txt"), ("所有文件", "*.*")])if file_path:try:with open(file_path, 'r', encoding='utf-8') as f:content = f.read()self.input_text.delete(1.0, tk.END)self.input_text.insert(tk.END, content)except Exception as e:messagebox.showerror("错误", f"文件读取失败: {str(e)}")def export_result(self):result = self.output_text.get(1.0, tk.END).strip()if not result:returnif self.output_mode_var.get() == '拼音注音':default_ext = ".html"filetypes = [("HTML文件", "*.html"), ("所有文件", "*.*")]else:default_ext = ".txt"filetypes = [("文本文件", "*.txt"), ("所有文件", "*.*")]file_path = filedialog.asksaveasfilename(defaultextension=default_ext,filetypes=filetypes)if file_path:try:with open(file_path, 'w', encoding='utf-8') as f:f.write(result)messagebox.showinfo("成功", "文件保存成功")except Exception as e:messagebox.showerror("错误", f"文件保存失败: {str(e)}")if __name__ == "__main__":root = tk.Tk()app = PinyinConverter(root)root.mainloop()
特别说明,需要将拼音添加到汉字上面时,用python实现比HTML5+JavaScript实现繁琐。在下一篇博文中用HTML5+JavaScript实现汉字转拼音工具。