PhoneWork/bot/handler.py
Yuyao Huang (Sam) 2a8f745b3d feat(router): 添加用户追踪和节点通知功能
在ROUTER_MODE启用时跟踪用户消息,并在节点注册/注销时通知相关用户。新增_known_users集合记录活跃用户,重构通知逻辑以支持所有已知用户或特定服务用户的通知。
2026-03-29 18:11:00 +08:00

254 lines
8.8 KiB
Python

"""Feishu event handler using lark-oapi long-connection (WebSocket) mode."""
from __future__ import annotations
import asyncio
import json
import logging
import threading
import time
from typing import Any, Dict
import lark_oapi as lark
from lark_oapi.api.im.v1 import P2ImMessageReceiveV1
from bot.commands import handle_command
from bot.feishu import send_text
from config import FEISHU_APP_ID, FEISHU_APP_SECRET, is_user_allowed
from orchestrator.agent import agent
from orchestrator.tools import set_current_chat
logger = logging.getLogger(__name__)
_main_loop: asyncio.AbstractEventLoop | None = None
_ws_connected: bool = False
_last_message_time: float = 0.0
_reconnect_count: int = 0
# Deduplication: drop Feishu re-deliveries by (user_id, content) within a short window.
# Feishu retries on network hiccups within ~60s using the same payload.
# We use a 10s window: identical content from the same user within 10s is a re-delivery,
# not a deliberate repeat (user intentional repeats arrive after the bot has already replied).
_recent_messages: dict[tuple[str, str], float] = {} # key: (user_id, content) → timestamp
_DEDUP_WINDOW = 10.0 # seconds
def _is_duplicate(user_id: str, content: str) -> bool:
"""Return True if this (user, content) pair arrived within the dedup window."""
now = time.time()
expired = [k for k, ts in _recent_messages.items() if now - ts > _DEDUP_WINDOW]
for k in expired:
del _recent_messages[k]
key = (user_id, content)
if key in _recent_messages:
return True
_recent_messages[key] = now
return False
def get_ws_status() -> dict[str, Any]:
"""Return WebSocket connection status."""
return {
"connected": _ws_connected,
"last_message_time": _last_message_time,
"reconnect_count": _reconnect_count,
}
def _handle_message(data: P2ImMessageReceiveV1) -> None:
global _last_message_time
_last_message_time = time.time()
try:
event = data.event
if event is None:
logger.warning("Received event with no data")
return
message = event.message
sender = event.sender
if message is None:
logger.warning("Received event with no message")
return
logger.debug(
"event type=%r chat_type=%r content=%r",
getattr(message, "message_type", None),
getattr(message, "chat_type", None),
(getattr(message, "content", None) or "")[:100],
)
if message.message_type != "text":
logger.info("Skipping non-text message_type=%r", message.message_type)
return
chat_id: str = message.chat_id or ""
raw_content: str = message.content or "{}"
content_obj = json.loads(raw_content)
text: str = content_obj.get("text", "").strip()
import re
text = re.sub(r"@\S+\s*", "", text).strip()
open_id: str = ""
if sender and sender.sender_id:
open_id = sender.sender_id.open_id or ""
if not text:
logger.info("Empty text after stripping, ignoring")
return
user_id = open_id or chat_id
if _is_duplicate(user_id, text):
logger.info("Dropping duplicate delivery: user=...%s text=%r", user_id[-8:], text[:60])
return
logger.info("✉ ...%s%r", open_id[-8:], text[:80])
if _main_loop is None:
logger.error("Main event loop not set; cannot process message")
return
asyncio.run_coroutine_threadsafe(
_process_message(user_id, chat_id, text),
_main_loop,
)
except Exception:
logger.exception("Error in _handle_message")
async def _process_message(user_id: str, chat_id: str, text: str) -> None:
"""Process message: check allowlist, then commands, then route to node or local agent."""
try:
set_current_chat(chat_id)
if not is_user_allowed(user_id):
logger.warning("Rejected message from unauthorized user: ...%s", user_id[-8:])
await send_text(chat_id, "chat_id", "Sorry, you are not authorized to use this bot.")
return
from config import ROUTER_MODE
if ROUTER_MODE:
from router.nodes import get_node_registry
get_node_registry().track_user(user_id)
reply = await handle_command(user_id, text)
if reply is not None:
if reply:
await send_text(chat_id, "chat_id", reply)
return
from config import ROUTER_MODE
if ROUTER_MODE:
from router.routing_agent import route
from router.rpc import forward
from router.nodes import get_node_registry
node_id, reason = await route(user_id, chat_id, text)
if node_id is None:
await send_text(chat_id, "chat_id", f"No host available: {reason}")
return
if node_id == "meta":
registry = get_node_registry()
nodes = registry.list_nodes()
if nodes:
lines = ["Connected Nodes:"]
for n in nodes:
active_node = registry.get_active_node(user_id)
marker = "" if n.get("node_id") == (active_node.node_id if active_node else None) else " "
lines.append(f"{marker}{n.get('display_name', 'unknown')} sessions={n.get('sessions', 0)} {n.get('status', 'unknown')}")
lines.append("\nUse \"/node <name>\" to switch active node.")
await send_text(chat_id, "chat_id", "\n".join(lines))
else:
await send_text(chat_id, "chat_id", "No nodes connected.")
return
try:
reply = await forward(node_id, user_id, chat_id, text)
if reply:
await send_text(chat_id, "chat_id", reply)
except Exception as e:
logger.exception("Failed to forward to node %s", node_id)
await send_text(chat_id, "chat_id", f"Error communicating with node: {e}")
else:
reply = await agent.run(user_id, text)
if reply:
await send_text(chat_id, "chat_id", reply)
except Exception:
logger.exception("Error processing message for user %s", user_id)
def _handle_any(data: lark.CustomizedEvent) -> None:
"""Catch-all handler to log any event the SDK receives."""
marshaled = lark.JSON.marshal(data)
if marshaled:
logger.info("RAW CustomizedEvent: %s", marshaled[:500])
def build_event_handler() -> lark.EventDispatcherHandler:
"""Construct the EventDispatcherHandler with all registered callbacks."""
handler = (
lark.EventDispatcherHandler.builder("", "")
.register_p2_im_message_receive_v1(_handle_message)
.register_p1_customized_event("im.message.receive_v1", _handle_any)
.build()
)
return handler
def start_websocket_client(loop: asyncio.AbstractEventLoop) -> None:
"""
Start the lark-oapi WebSocket long-connection client in a background thread.
Must be called after the asyncio event loop is running.
"""
global _main_loop
_main_loop = loop
def _run_with_reconnect() -> None:
global _ws_connected, _reconnect_count
backoff = 1.0
max_backoff = 60.0
# lark_oapi.ws.client captures the event loop at module import time.
# In standalone mode uvicorn already owns the main loop, so we create
# a fresh loop for this thread and redirect the lark module to use it.
thread_loop = asyncio.new_event_loop()
asyncio.set_event_loop(thread_loop)
import lark_oapi.ws.client as _lark_ws_client
_lark_ws_client.loop = thread_loop
while True:
try:
_ws_connected = False
event_handler = build_event_handler()
ws_client = lark.ws.Client(
FEISHU_APP_ID,
FEISHU_APP_SECRET,
event_handler=event_handler,
log_level=lark.LogLevel.INFO,
)
logger.info("Starting Feishu long-connection client...")
_ws_connected = True
_reconnect_count += 1
ws_client.start()
logger.warning("WebSocket disconnected, will reconnect...")
except Exception as e:
logger.error("WebSocket error: %s", e)
finally:
_ws_connected = False
logger.info("Reconnecting in %.1fs (attempt %d)...", backoff, _reconnect_count)
time.sleep(backoff)
backoff = min(backoff * 2, max_backoff)
thread = threading.Thread(target=_run_with_reconnect, daemon=True, name="feishu-ws")
thread.start()
logger.info("Feishu WebSocket thread started")