#!/usr/bin/env python # -*- coding: UTF-8 -*- """ @Project :redbook @File :visualize.py @IDE :PyCharm @Author :rengengchen @Time :2024/4/17 14:44 """ import os import tkinter as tk import tkinter.font as tkFont from tkinter import filedialog from urllib.parse import urlparse from loguru import logger from config import HEAD, RESULT_PATH, modify_result_path, modify_encoding, modify_filter_words from scraper import read_comment # Function to handle button click def run(): confirm_button.config(text='采集中') confirm_button['state'] = tk.DISABLED try: cookie = cookie_text.get("1.0", tk.END) HEAD['Cookie'] = cookie.strip() s1 = sec1.get("1.0", tk.END) # Retrieves text from the Text widget s2 = sec2.get("1.0", tk.END) encoding = encoding_text.get("1.0", tk.END) modify_encoding(encoding.strip()) keywords_text.foc_in() filter_words = keywords_text.get("1.0", tk.END) modify_filter_words(filter_words.strip().split('\t')) note_link_text.foc_in() links = note_link_text.get("1.0", tk.END) # Retrieves text from the Text widget note_ids = [] for link in links.split('\n'): link = link.strip() if link: link = link.strip() url = urlparse(link) note_id = url.path.split('/')[-1] note_ids.append(note_id) with open(f'{RESULT_PATH}/comments.csv', mode='w', encoding='utf8', errors='ignore') as f: f.write(','.join( ['nickname', 'content', 'ip_location', 'level', 'user_id', 'status', 'liked', 'create_time', 'note_id', 'id'])) f.write('\n') for note_id in note_ids: logger.info(f'scrape {note_id}') read_comment(note_id, s1=int(s1), s2=int(s2)) except Exception as e: result.config(text=f'\U0001F605fail!!!\U0001F605\n{e}', fg='green') raise e else: result.config(text='\U0001F618success!!!\U0001F618', fg='red') logger.info(f'over') finally: confirm_button.config(text='开始采集评论') confirm_button['state'] = tk.NORMAL def select_path(): dir_path = filedialog.askdirectory() if dir_path: modify_result_path(dir_path) label_file.config(text="评论输出到指定目录下的comments.csv, 会覆盖已存在文件\n已指定存储目录: " + dir_path) class PlaceholderText(tk.Text): def __init__(self, master=None, placeholder="请输入文本", color='grey', **kwargs): super().__init__(master, **kwargs) self.placeholder = placeholder self.placeholder_color = color self.default_fg_color = self['fg'] self.insert("1.0", self.placeholder) self['fg'] = self.placeholder_color self.bind("", self.foc_in) self.bind("", self.foc_out) def foc_in(self, event=None): if self.get("1.0", "end-1c") == self.placeholder and self['fg'] == self.placeholder_color: self.delete("1.0", "end") self['fg'] = self.default_fg_color def foc_out(self, event=None): if not self.get("1.0", "end-1c").strip(): self.insert("1.0", self.placeholder) self['fg'] = self.placeholder_color # 创建主窗口 root = tk.Tk() root.title('小红书评论采集器') root.minsize(width=850, height=650) # Top frame for copyright top_frame = tk.Frame(root) top_frame.pack(fill=tk.X, padx=10, pady=10) copyright = tk.Label(top_frame, text='老板好', font=('微软雅黑', 30), fg='red') copyright.pack(side=tk.TOP) # Define a font entry_font = tkFont.Font(family='微软雅黑', size=14) # Middle frame for inputs middle_frame = tk.Frame(root) middle_frame.pack(fill=tk.X, padx=10, pady=5) # 输入Cookie标签和文本框 tk.Label(middle_frame, text='Cookie:', font=('微软雅黑', 14)).grid(row=0, column=0, sticky='w') cookie_text = tk.Text(middle_frame, width=72, height=1, font=entry_font) cookie_text.grid(row=0, column=1, padx=30, pady=5, columnspan=2, sticky='ew') # 确保横向填充 # Note link input with placeholder and scrollbar tk.Label(middle_frame, text='笔记链接, 多个笔记链接请换行输入:', font=('微软雅黑', 14)).grid(row=1, column=0, sticky='w', pady=5) note_link_text = PlaceholderText(middle_frame, placeholder="例:\n" "https://www.xiaohongshu.com/explore/66174eea0000f00a1b00c6c6\n" "https://www.xiaohongshu.com/explore/4517423a0000f00a1b00c6c6", font=entry_font, width=110, height=7) note_link_text.grid(row=2, column=0, sticky='ew', columnspan=2) scroll = tk.Scrollbar(middle_frame, command=note_link_text.yview) scroll.grid(row=2, column=2, sticky='ns') note_link_text.config(yscrollcommand=scroll.set) # 停用词 tk.Label(middle_frame, text='过滤停用词:', font=('微软雅黑', 14)).grid(row=3, column=0, sticky='w', pady=5) keywords_text = PlaceholderText(middle_frame, placeholder="过滤词之间用Tab键(制表键)分隔, 例: 老板\t送我\tStellar Blade豪华版\t爽一下", font=entry_font, width=3, height=1) keywords_text.grid(row=3, column=1, sticky='ew', columnspan=2) # 重新配置Grid的列配置,使其更好地扩展 middle_frame.grid_columnconfigure(1, weight=1) # Middle frame for inputs middle_frame1 = tk.Frame(root) middle_frame1.pack(fill=tk.X, pady=5) # Label for interval tk.Label(middle_frame1, text='每次爬取间隔时间范围, 单位: 秒 (在区间内随机): ', font=('微软雅黑', 14)).grid(row=3, column=0, sticky='w', pady=5) sec1 = tk.Text(middle_frame1, width=3, height=1, font=entry_font) sec1.grid(row=3, column=1, sticky='w') sec1.insert('1.0', '1') tk.Label(middle_frame1, text='s', font=('微软雅黑', 14)).grid(row=3, column=2, sticky='w') tk.Label(middle_frame1, text='———', font=('微软雅黑', 14)).grid(row=3, column=3, sticky='ew', columnspan=2) sec2 = tk.Text(middle_frame1, width=3, height=1, font=entry_font) sec2.grid(row=3, column=5, sticky='w') sec2.insert('1.0', '3') tk.Label(middle_frame1, text='s', font=('微软雅黑', 14)).grid(row=3, column=6, sticky='w') tk.Label(middle_frame1, text='存储结果编码: ', font=('微软雅黑', 14)).grid(row=4, column=0, sticky='e', pady=5) encoding_text = tk.Text(middle_frame1, width=8, height=1, font=entry_font) encoding_text.grid(row=4, column=1, sticky='w', columnspan=3) encoding_text.insert('1.0', 'GBK') # Bottom frame for settings and actions middle_frame2 = tk.Frame(root) middle_frame2.pack(fill=tk.X, padx=10, pady=5) # Directory and action buttons upload_button = tk.Button(middle_frame2, text="选择存储目录", command=select_path, padx=30, pady=5) upload_button.grid(row=1, column=0, padx=(100, 20), pady=5) label_file = tk.Label(middle_frame2, text='评论输出到指定目录下的comments.csv, 会覆盖已存在文件\n' f'默认输出目录: {RESULT_PATH}', font=('微软雅黑', 10), fg='blue') label_file.grid(row=1, column=1, padx=(5, 10), pady=5, sticky='w') # Bottom frame for settings and actions bottom_frame = tk.Frame(root) bottom_frame.pack(fill=tk.X, padx=10, pady=5) confirm_button = tk.Button(bottom_frame, text='开始采集评论', command=run, padx=100, pady=10) confirm_button.pack(side=tk.TOP) result = tk.Label(bottom_frame, font=('微软雅黑', 10)) result.pack(side=tk.TOP) def main(): log_path = os.path.join(RESULT_PATH, 'logs', '{time:YYYY-MM-DD HH}.log') logger.add(log_path, rotation="1 hour", retention=12, compression="zip") root.mainloop() if __name__ == '__main__': main()