当前位置：首页 > news >正文

用python实现汉字转拼音工具

news 2025/7/12 20:01:12

用python实现汉字转拼音工具

主要功能特点：

多种拼音风格选择（带声调符号、数字声调、无声调）
输出模式：可以选择“普通模式”（仅拼音）或“拼音注音”（每个汉字的拼音显示在上方）
可自定义分隔符，可以自定义拼音之间的分隔符（默认空格）
标点符号保留选项，关闭时会自动过滤非汉字字符
支持文件导入/导出（文本文件）
一键复制结果到剪贴板

基本实现

程序需要你安装pypinyin和pyperclip这两个第三方库/模块（若你未安装的话）。

pypinyin：用于将汉字转换为拼音。

pyperclip：跨平台的剪贴板操作库，可以用来复制和粘贴文本到系统的剪贴板。

还用到了Python 的一些标准库，它们通常不需要安装，一般会随 Python 一起安装。

运行界面如下：

源码如下：

import tkinter as tk
from tkinter import ttk, filedialog, messagebox, font  
from pypinyin import pinyin, Style
import pyperclip
import os
import reclass PinyinConverter:def __init__(self, root):self.root = rootself.root.title("汉字转拼音工具")self.setup_ui()self.set_default_values()def set_default_values(self):self.style_var.set('带声调')self.separator_var.set(' ')#self.handle_polyphonic_var.set(0)self.keep_punctuation_var.set(1)self.output_mode_var.set('普通模式')def setup_ui(self):# 输入区域input_frame = ttk.LabelFrame(self.root, text="输入文本")input_frame.grid(row=0, column=0, padx=10, pady=5, sticky='nsew')self.input_text = tk.Text(input_frame, height=10, width=50, wrap='word')self.input_text.pack(padx=5, pady=5, fill='both', expand=True)# 控制面板control_frame = ttk.Frame(self.root)control_frame.grid(row=1, column=0, padx=10, pady=5, sticky='ew')# 样式设置style_frame = ttk.LabelFrame(control_frame, text="转换设置")style_frame.pack(side=tk.LEFT, padx=5, pady=2)# 拼音样式ttk.Label(style_frame, text="拼音风格:").grid(row=0, column=0, sticky='w')self.style_var = tk.StringVar()styles = {'带声调': Style.TONE,'数字声调': Style.TONE3,'无声调': Style.NORMAL}self.style_combobox = ttk.Combobox(style_frame, textvariable=self.style_var,values=list(styles.keys()), state='readonly')self.style_combobox.grid(row=0, column=1, padx=2)# 输出模式ttk.Label(style_frame, text="输出模式:").grid(row=1, column=0, sticky='w')self.output_mode_var = tk.StringVar()modes = ['普通模式', '拼音注音']self.mode_combobox = ttk.Combobox(style_frame, textvariable=self.output_mode_var,values=modes, state='readonly')self.mode_combobox.grid(row=1, column=1, padx=2)# 分隔符ttk.Label(style_frame, text="分隔符:").grid(row=2, column=0, sticky='w')self.separator_var = tk.StringVar()ttk.Entry(style_frame, textvariable=self.separator_var, width=3).grid(row=2, column=1, sticky='w')# 高级选项
##        self.handle_polyphonic_var = tk.IntVar()
##        ttk.Checkbutton(
##            style_frame, text="处理多音字", 
##            variable=self.handle_polyphonic_var).grid(row=3, column=0, columnspan=2, sticky='w')self.keep_punctuation_var = tk.IntVar()ttk.Checkbutton(style_frame, text="保留标点",variable=self.keep_punctuation_var).grid(row=4, column=0, columnspan=2, sticky='w')# 操作按钮btn_frame = ttk.Frame(control_frame)btn_frame.pack(side=tk.RIGHT, padx=5)ttk.Button(btn_frame, text="转换", command=self.convert).pack(side=tk.TOP, fill=tk.X)ttk.Button(btn_frame, text="清空", command=self.clear_text).pack(side=tk.TOP, fill=tk.X, pady=2)ttk.Button(btn_frame, text="复制结果", command=self.copy_result).pack(side=tk.TOP, fill=tk.X)ttk.Button(btn_frame, text="导入文件", command=self.import_file).pack(side=tk.TOP, fill=tk.X, pady=2)ttk.Button(btn_frame, text="导出结果", command=self.export_result).pack(side=tk.TOP, fill=tk.X)# 输出区域output_frame = ttk.LabelFrame(self.root, text="拼音结果")output_frame.grid(row=2, column=0, padx=10, pady=5, sticky='nsew')self.output_text = tk.Text(output_frame, height=10, width=50, wrap='word')self.output_text.pack(padx=5, pady=5, fill='both', expand=True)# 配置标签和字体# 配置标签和字体 - 修正这里的font导入问题self.pinyin_font = font.Font(family="Arial", size=10)  # 使用正确导入的fontself.hanzi_font = font.Font(family="SimSun", size=12)self.output_text.tag_configure("pinyin", font=self.pinyin_font, foreground="blue")self.output_text.tag_configure("hanzi", font=self.hanzi_font)# 布局配置self.root.columnconfigure(0, weight=1)self.root.rowconfigure(0, weight=1)self.root.rowconfigure(2, weight=1)def convert(self):try:input_str = self.input_text.get("1.0", tk.END).strip()if not input_str:returnstyle_mapping = {'带声调': Style.TONE,'数字声调': Style.TONE3,'无声调': Style.NORMAL}style = style_mapping[self.style_var.get()]separator = self.separator_var.get()keep_punctuation = bool(self.keep_punctuation_var.get())output_mode = self.output_mode_var.get()self.output_text.delete(1.0, tk.END)if output_mode == '普通模式':# 普通模式：直接输出拼音pinyin_list = pinyin(input_str,style=style,# heteronym=bool(self.handle_polyphonic_var.get()),errors='ignore' if not keep_punctuation else lambda x: x)result = []for word in pinyin_list:selected = word[0] if word else ''result.append(selected + separator)self.output_text.insert(tk.END, ''.join(result).strip())else:# 拼音注音模式# 将文本分割成连续的汉字段落和非汉字段落segments = []current_segment = ""current_type = None  # 0 for non-Chinese, 1 for Chinesefor char in input_str:is_chinese = '\u4e00' <= char <= '\u9fff'char_type = 1 if is_chinese else 0if current_type is None:current_type = char_typecurrent_segment = charelif current_type == char_type:current_segment += charelse:segments.append((current_segment, current_type))current_segment = charcurrent_type = char_typeif current_segment:segments.append((current_segment, current_type))# 处理每个段落for segment_text, is_chinese in segments:if is_chinese:# 处理汉字段落py_results = pinyin(segment_text,style=style,# heteronym=bool(self.handle_polyphonic_var.get()))# 创建拼音行py_line = ""for py in py_results:py_text = py[0] if py else ''py_line += py_text + " "# 创建汉字行，确保每个汉字有足够空间对应拼音hz_line = ""for char in segment_text:hz_line += char + " "# 插入拼音和汉字self.output_text.insert(tk.END, py_line + "\n", "pinyin")self.output_text.insert(tk.END, hz_line + "\n", "hanzi")else:# 处理非汉字段落self.output_text.insert(tk.END, segment_text + "\n")except Exception as e:import tracebackerror_details = traceback.format_exc()messagebox.showerror("错误", f"转换失败: {str(e)}\n\n详细信息:\n{error_details}")def clear_text(self):self.input_text.delete(1.0, tk.END)self.output_text.delete(1.0, tk.END)def copy_result(self):result = self.output_text.get(1.0, tk.END).strip()if result:pyperclip.copy(result)messagebox.showinfo("成功", "已复制到剪贴板")def import_file(self):file_path = filedialog.askopenfilename(filetypes=[("文本文件", "*.txt"), ("所有文件", "*.*")])if file_path:try:with open(file_path, 'r', encoding='utf-8') as f:content = f.read()self.input_text.delete(1.0, tk.END)self.input_text.insert(tk.END, content)except Exception as e:messagebox.showerror("错误", f"文件读取失败: {str(e)}")def export_result(self):result = self.output_text.get(1.0, tk.END).strip()if not result:returnfile_path = filedialog.asksaveasfilename(defaultextension=".txt",filetypes=[("文本文件", "*.txt"), ("所有文件", "*.*")])if file_path:try:with open(file_path, 'w', encoding='utf-8') as f:f.write(result)messagebox.showinfo("成功", "文件保存成功")except Exception as e:messagebox.showerror("错误", f"文件保存失败: {str(e)}")if __name__ == "__main__":root = tk.Tk()app = PinyinConverter(root)root.mainloop()

改进版

在“拼音注音”模式下，拼音和汉字之间的对齐处理计较困难。

使用HTML5来实现汉字转拼音工具（特别是拼音注音功能）更加容易和灵活，浏览器中的渲染效果更好。

这个改进版方案，当用户选择"拼音注音"模式并点击转换后，程序会生成HTML文件并保存到临时目录。用户可以点击"在浏览器中查看"按钮来查看渲染效果。结果也会显示在文本区域中（以HTML源码形式）。

这种方法避免了在Tkinter窗口中嵌入复杂渲染引擎的问题，充分利用了系统浏览器的强大功能，同时保持了程序的简单性和稳定性。

运行界面如下：

源码如下：

import tkinter as tk
from tkinter import ttk, filedialog, messagebox
from pypinyin import pinyin, Style
import pyperclip
import os
import re
import webbrowser
import tempfileclass PinyinConverter:def __init__(self, root):self.root = rootself.root.title("汉字转拼音工具")self.setup_ui()self.set_default_values()self.temp_html_file = Nonedef set_default_values(self):self.style_var.set('带声调')self.separator_var.set(' ')# self.handle_polyphonic_var.set(0)self.keep_punctuation_var.set(1)self.output_mode_var.set('普通模式')def setup_ui(self):# 输入区域input_frame = ttk.LabelFrame(self.root, text="输入文本")input_frame.grid(row=0, column=0, padx=10, pady=5, sticky='nsew')self.input_text = tk.Text(input_frame, height=10, width=50, wrap='word')self.input_text.pack(padx=5, pady=5, fill='both', expand=True)# 控制面板control_frame = ttk.Frame(self.root)control_frame.grid(row=1, column=0, padx=10, pady=5, sticky='ew')# 样式设置style_frame = ttk.LabelFrame(control_frame, text="转换设置")style_frame.pack(side=tk.LEFT, padx=5, pady=2)# 拼音样式ttk.Label(style_frame, text="拼音风格:").grid(row=0, column=0, sticky='w')self.style_var = tk.StringVar()styles = {'带声调': Style.TONE,'数字声调': Style.TONE3,'无声调': Style.NORMAL}self.style_combobox = ttk.Combobox(style_frame, textvariable=self.style_var,values=list(styles.keys()), state='readonly')self.style_combobox.grid(row=0, column=1, padx=2)# 输出模式ttk.Label(style_frame, text="输出模式:").grid(row=1, column=0, sticky='w')self.output_mode_var = tk.StringVar()modes = ['普通模式', '拼音注音']self.mode_combobox = ttk.Combobox(style_frame, textvariable=self.output_mode_var,values=modes, state='readonly')self.mode_combobox.grid(row=1, column=1, padx=2)# 分隔符ttk.Label(style_frame, text="分隔符:").grid(row=2, column=0, sticky='w')self.separator_var = tk.StringVar()ttk.Entry(style_frame, textvariable=self.separator_var, width=3).grid(row=2, column=1, sticky='w')# 高级选项
##        self.handle_polyphonic_var = tk.IntVar()
##        ttk.Checkbutton(
##            style_frame, text="处理多音字", 
##            variable=self.handle_polyphonic_var).grid(row=3, column=0, columnspan=2, sticky='w')self.keep_punctuation_var = tk.IntVar()ttk.Checkbutton(style_frame, text="保留标点",variable=self.keep_punctuation_var).grid(row=4, column=0, columnspan=2, sticky='w')# 操作按钮btn_frame = ttk.Frame(control_frame)btn_frame.pack(side=tk.RIGHT, padx=5)ttk.Button(btn_frame, text="转换", command=self.convert).pack(side=tk.TOP, fill=tk.X)ttk.Button(btn_frame, text="清空", command=self.clear_text).pack(side=tk.TOP, fill=tk.X, pady=2)ttk.Button(btn_frame, text="复制结果", command=self.copy_result).pack(side=tk.TOP, fill=tk.X)ttk.Button(btn_frame, text="导入文件", command=self.import_file).pack(side=tk.TOP, fill=tk.X, pady=2)ttk.Button(btn_frame, text="导出结果", command=self.export_result).pack(side=tk.TOP, fill=tk.X)ttk.Button(btn_frame, text="在浏览器中查看", command=self.view_in_browser).pack(side=tk.TOP, fill=tk.X, pady=2)# 输出区域output_frame = ttk.LabelFrame(self.root, text="拼音结果")output_frame.grid(row=2, column=0, padx=10, pady=5, sticky='nsew')self.output_text = tk.Text(output_frame, height=10, width=50, wrap='word')self.output_text.pack(padx=5, pady=5, fill='both', expand=True)# 布局配置self.root.columnconfigure(0, weight=1)self.root.rowconfigure(0, weight=1)self.root.rowconfigure(2, weight=1)def convert(self):try:input_str = self.input_text.get("1.0", tk.END).strip()if not input_str:returnstyle_mapping = {'带声调': Style.TONE,'数字声调': Style.TONE3,'无声调': Style.NORMAL}style = style_mapping[self.style_var.get()]separator = self.separator_var.get()keep_punctuation = bool(self.keep_punctuation_var.get())output_mode = self.output_mode_var.get()self.output_text.delete(1.0, tk.END)if output_mode == '普通模式':# 普通模式：直接输出拼音pinyin_list = pinyin(input_str,style=style,# heteronym=bool(self.handle_polyphonic_var.get()),errors='ignore' if not keep_punctuation else lambda x: x)result = []for word in pinyin_list:selected = word[0] if word else ''result.append(selected + separator)result_text = ''.join(result).strip()self.output_text.insert(tk.END, result_text)else:# 拼音注音模式：生成HTMLhtml_content = self.generate_html_with_pinyin(input_str, style, # handle_polyphonic=bool(self.handle_polyphonic_var.get()))# 在文本框中显示HTML源码self.output_text.insert(tk.END, html_content)# 保存HTML到临时文件，准备用浏览器查看if self.temp_html_file:try:os.unlink(self.temp_html_file)except:passfd, self.temp_html_file = tempfile.mkstemp(suffix='.html')with os.fdopen(fd, 'w', encoding='utf-8') as f:f.write(html_content)# 自动在浏览器中打开查看效果messagebox.showinfo("提示", "HTML已生成，点击“在浏览器中查看”按钮可以打开浏览器查看效果")except Exception as e:import tracebackerror_details = traceback.format_exc()messagebox.showerror("错误", f"转换失败: {str(e)}\n\n详细信息:\n{error_details}")def generate_html_with_pinyin(self, text, style, handle_polyphonic=False):"""生成带有拼音注音的HTML文档"""html_content = ['<!DOCTYPE html>','<html lang="zh-CN">','<head>','<meta charset="UTF-8">','<title>汉字拼音注音</title>','<style>','body {','  font-family: "Microsoft YaHei", SimSun, sans-serif;','  line-height: 2;','  margin: 20px;','  background-color: #f9f9f9;','}','ruby {','  display: inline-flex;','  flex-direction: column-reverse;','  text-align: center;','  margin: 0 2px;','}','rt {','  font-size: 0.7em;','  color: #0066cc;','  line-height: 1.2;','  text-align: center;','  font-weight: normal;','}','.container {','  background-color: white;','  padding: 20px;','  border-radius: 5px;','  box-shadow: 0 2px 5px rgba(0,0,0,0.1);','  max-width: 800px;','  margin: 0 auto;','}','.non-chinese {','  display: inline-block;','}','</style>','</head>','<body>','<div class="container">']# 将文本分割成连续的汉字段落和非汉字段落segments = []current_segment = ""current_type = None  # 0 for non-Chinese, 1 for Chinesefor char in text:is_chinese = '\u4e00' <= char <= '\u9fff'char_type = 1 if is_chinese else 0if current_type is None:current_type = char_typecurrent_segment = charelif current_type == char_type:current_segment += charelse:segments.append((current_segment, current_type))current_segment = charcurrent_type = char_typeif current_segment:segments.append((current_segment, current_type))# 处理每个段落for segment_text, is_chinese in segments:if is_chinese:# 处理汉字段落py_results = pinyin(segment_text,style=style,# heteronym=handle_polyphonic)# 为每个汉字创建ruby标签for i, (char, py) in enumerate(zip(segment_text, py_results)):py_text = py[0] if py else ''html_content.append(f'<ruby>{char}<rt>{py_text}</rt></ruby>')else:# 处理非汉字段落html_content.append(f'<span class="non-chinese">{segment_text}</span>')# 完成HTMLhtml_content.extend(['</div>','</body>','</html>'])return '\n'.join(html_content)def view_in_browser(self):"""在浏览器中打开HTML文件"""if not self.temp_html_file or not os.path.exists(self.temp_html_file):if self.output_mode_var.get() == '拼音注音':self.convert()  # 重新生成HTMLelse:messagebox.showinfo("提示", "请先切换到拼音注音模式并执行转换")returnif self.temp_html_file and os.path.exists(self.temp_html_file):webbrowser.open('file://' + os.path.abspath(self.temp_html_file))else:messagebox.showerror("错误", "HTML文件不存在或无法访问")def clear_text(self):self.input_text.delete(1.0, tk.END)self.output_text.delete(1.0, tk.END)# 删除临时HTML文件if self.temp_html_file and os.path.exists(self.temp_html_file):try:os.unlink(self.temp_html_file)self.temp_html_file = Noneexcept:passdef copy_result(self):result = self.output_text.get(1.0, tk.END).strip()if result:pyperclip.copy(result)messagebox.showinfo("成功", "已复制到剪贴板")def import_file(self):file_path = filedialog.askopenfilename(filetypes=[("文本文件", "*.txt"), ("所有文件", "*.*")])if file_path:try:with open(file_path, 'r', encoding='utf-8') as f:content = f.read()self.input_text.delete(1.0, tk.END)self.input_text.insert(tk.END, content)except Exception as e:messagebox.showerror("错误", f"文件读取失败: {str(e)}")def export_result(self):result = self.output_text.get(1.0, tk.END).strip()if not result:returnif self.output_mode_var.get() == '拼音注音':default_ext = ".html"filetypes = [("HTML文件", "*.html"), ("所有文件", "*.*")]else:default_ext = ".txt"filetypes = [("文本文件", "*.txt"), ("所有文件", "*.*")]file_path = filedialog.asksaveasfilename(defaultextension=default_ext,filetypes=filetypes)if file_path:try:with open(file_path, 'w', encoding='utf-8') as f:f.write(result)messagebox.showinfo("成功", "文件保存成功")except Exception as e:messagebox.showerror("错误", f"文件保存失败: {str(e)}")if __name__ == "__main__":root = tk.Tk()app = PinyinConverter(root)root.mainloop()

特别说明，需要将拼音添加到汉字上面时，用python实现比HTML5+JavaScript实现繁琐。在下一篇博文中用HTML5+JavaScript实现汉字转拼音工具。

查看全文

http://www.xdnf.cn/news/566065.html