From 8dab229aafb4d808d0c5c03238f44b4e8f627641 Mon Sep 17 00:00:00 2001
From: "Yuyao Huang (Sam)" <huangyuyao@outlook.com>
Date: Sun, 29 Mar 2026 04:24:27 +0800
Subject: [PATCH] =?UTF-8?q?feat:=20=E6=B7=BB=E5=8A=A0=E6=B5=8B=E8=AF=95?=
 =?UTF-8?q?=E6=A1=86=E6=9E=B6=E5=8F=8A=E5=8A=9F=E8=83=BD=E6=B5=8B=E8=AF=95?=
 =?UTF-8?q?=E7=94=A8=E4=BE=8B?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

test: 实现BDD测试框架及功能测试
docs: 添加测试配置文件及文档
refactor: 重构命令处理逻辑以支持测试
---
 bot/commands.py                              |  22 +++
 conftest.py                                  |  16 ++
 pyproject.toml                               |   3 +
 requirements-dev.txt                         |   5 +
 tests/conftest.py                            | 167 +++++++++++++++++
 tests/features/agent/passthrough.feature     |  19 ++
 tests/features/agent/routing.feature         |  35 ++++
 tests/features/commands/close.feature        |  38 ++++
 tests/features/commands/direct_smart.feature |  27 +++
 tests/features/commands/help.feature         |  25 +++
 tests/features/commands/new.feature          |  37 ++++
 tests/features/commands/nodes.feature        |  13 ++
 tests/features/commands/remind.feature       |  33 ++++
 tests/features/commands/shell.feature        |  22 +++
 tests/features/commands/status.feature       |  40 ++++
 tests/features/commands/switch.feature       |  30 +++
 tests/features/commands/tasks.feature        |  27 +++
 tests/keyring_test.yaml                      |  10 +
 tests/step_defs/__init__.py                  |   0
 tests/step_defs/common_steps.py              | 183 +++++++++++++++++++
 tests/step_defs/test_agent.py                |  76 ++++++++
 tests/step_defs/test_commands.py             |  78 ++++++++
 22 files changed, 906 insertions(+)
 create mode 100644 conftest.py
 create mode 100644 pyproject.toml
 create mode 100644 requirements-dev.txt
 create mode 100644 tests/conftest.py
 create mode 100644 tests/features/agent/passthrough.feature
 create mode 100644 tests/features/agent/routing.feature
 create mode 100644 tests/features/commands/close.feature
 create mode 100644 tests/features/commands/direct_smart.feature
 create mode 100644 tests/features/commands/help.feature
 create mode 100644 tests/features/commands/new.feature
 create mode 100644 tests/features/commands/nodes.feature
 create mode 100644 tests/features/commands/remind.feature
 create mode 100644 tests/features/commands/shell.feature
 create mode 100644 tests/features/commands/status.feature
 create mode 100644 tests/features/commands/switch.feature
 create mode 100644 tests/features/commands/tasks.feature
 create mode 100644 tests/keyring_test.yaml
 create mode 100644 tests/step_defs/__init__.py
 create mode 100644 tests/step_defs/common_steps.py
 create mode 100644 tests/step_defs/test_agent.py
 create mode 100644 tests/step_defs/test_commands.py

diff --git a/bot/commands.py b/bot/commands.py
index eaf8e50..160539d 100644
--- a/bot/commands.py
+++ b/bot/commands.py
@@ -161,6 +161,28 @@ async def _cmd_status(user_id: str) -> str:
 
 async def _cmd_close(user_id: str, args: str) -> str:
     """Close a session."""
+    # If a specific conv_id is given by name (not a number), resolve it directly.
+    if args:
+        try:
+            int(args)
+            by_number = True
+        except ValueError:
+            by_number = False
+
+        if not by_number:
+            # Explicit conv_id given — look it up directly (may belong to another user).
+            conv_id = args.strip()
+            try:
+                success = await manager.close(conv_id, user_id=user_id)
+                if success:
+                    if agent.get_active_conv(user_id) == conv_id:
+                        agent._active_conv[user_id] = None
+                    return f"✓ Closed session `{conv_id}`"
+                else:
+                    return f"Session `{conv_id}` not found."
+            except PermissionError as e:
+                return str(e)
+
     sessions = manager.list_sessions(user_id=user_id)
     if not sessions:
         return "No sessions to close."
diff --git a/conftest.py b/conftest.py
new file mode 100644
index 0000000..749278b
--- /dev/null
+++ b/conftest.py
@@ -0,0 +1,16 @@
+"""
+Root conftest — runs before pytest collects any test files or imports any
+production modules. Patches config._CONFIG_PATH to point at the test keyring
+so that `import config` never tries to open the real keyring.yaml.
+
+Must live at the repo root (not inside tests/) to fire before collection.
+"""
+from pathlib import Path
+import importlib
+
+_TEST_KEYRING = Path(__file__).parent / "tests" / "keyring_test.yaml"
+
+# Patch config before anything else imports it
+import config as _config_mod
+_config_mod._CONFIG_PATH = _TEST_KEYRING
+importlib.reload(_config_mod)
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 0000000..68f242a
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,3 @@
+[tool.pytest.ini_options]
+asyncio_mode = "auto"
+testpaths = ["tests"]
diff --git a/requirements-dev.txt b/requirements-dev.txt
new file mode 100644
index 0000000..0e52085
--- /dev/null
+++ b/requirements-dev.txt
@@ -0,0 +1,5 @@
+pytest>=8.0.0
+pytest-asyncio>=0.24.0
+pytest-bdd>=7.0.0
+pytest-recording>=0.13.0
+pytest-mock>=3.12.0
diff --git a/tests/conftest.py b/tests/conftest.py
new file mode 100644
index 0000000..0fdbc21
--- /dev/null
+++ b/tests/conftest.py
@@ -0,0 +1,167 @@
+"""
+Master test fixtures for PhoneWork BDD tests.
+"""
+from __future__ import annotations
+
+import asyncio
+import time
+from pathlib import Path
+from typing import Any
+from unittest.mock import AsyncMock, MagicMock, patch
+
+import pytest
+
+TESTS_DIR = Path(__file__).parent
+CASSETTES_DIR = TESTS_DIR / "cassettes"
+CASSETTES_DIR.mkdir(exist_ok=True)
+
+
+# ── Feishu send mock ─────────────────────────────────────────────────────────
+
+@pytest.fixture
+def feishu_calls():
+    """
+    Capture all calls to bot.feishu send functions.
+    Lazy imports inside commands.py pull from bot.feishu at call time,
+    so patching the module attributes is sufficient.
+    """
+    captured: dict[str, list] = {"texts": [], "cards": [], "files": []}
+
+    async def mock_send_text(receive_id, receive_id_type, text):
+        captured["texts"].append({"receive_id": receive_id, "text": text})
+
+    async def mock_send_card(receive_id, receive_id_type, card):
+        captured["cards"].append({"receive_id": receive_id, "card": card})
+
+    async def mock_send_file(receive_id, receive_id_type, file_path, file_type="stream"):
+        captured["files"].append({"receive_id": receive_id, "file_path": file_path})
+
+    with patch("bot.feishu.send_text", side_effect=mock_send_text), \
+         patch("bot.feishu.send_card", side_effect=mock_send_card), \
+         patch("bot.feishu.send_file", side_effect=mock_send_file):
+        yield captured
+
+
+# ── run_claude mock ──────────────────────────────────────────────────────────
+
+@pytest.fixture
+def mock_run_claude():
+    """
+    Replace run_claude in both its definition and its import site in manager.py.
+    Default return value is a short CC-style output string.
+    """
+    mock = AsyncMock(return_value="Claude Code: task complete.")
+    with patch("agent.pty_process.run_claude", mock), \
+         patch("agent.manager.run_claude", mock):
+        yield mock
+
+
+# ── Singleton state resets ───────────────────────────────────────────────────
+
+@pytest.fixture(autouse=True)
+def reset_manager():
+    from agent.manager import manager
+    manager._sessions.clear()
+    yield
+    manager._sessions.clear()
+
+
+@pytest.fixture(autouse=True)
+def reset_agent():
+    from orchestrator.agent import agent
+    agent._history.clear()
+    agent._active_conv.clear()
+    agent._passthrough.clear()
+    agent._user_locks.clear()
+    yield
+    agent._history.clear()
+    agent._active_conv.clear()
+    agent._passthrough.clear()
+    agent._user_locks.clear()
+
+
+@pytest.fixture(autouse=True)
+def reset_task_runner():
+    from agent.task_runner import task_runner
+    task_runner._tasks.clear()
+    yield
+    task_runner._tasks.clear()
+
+
+@pytest.fixture(autouse=True)
+def reset_scheduler():
+    from agent.scheduler import scheduler
+    for task in list(getattr(scheduler, "_tasks", {}).values()):
+        task.cancel()
+    scheduler._jobs.clear()
+    yield
+    for task in list(getattr(scheduler, "_tasks", {}).values()):
+        task.cancel()
+    scheduler._jobs.clear()
+
+
+@pytest.fixture(autouse=True)
+def reset_contextvars():
+    from orchestrator.tools import set_current_user, set_current_chat
+    set_current_user(None)
+    set_current_chat(None)
+    yield
+    set_current_user(None)
+    set_current_chat(None)
+
+
+@pytest.fixture(autouse=True)
+def reset_reply(pytestconfig):
+    """Clear _reply before each test so stale values don't leak between scenarios."""
+    pytestconfig._reply = None
+    yield
+
+
+# ── Working directory isolation ──────────────────────────────────────────────
+
+@pytest.fixture
+def tmp_working_dir(tmp_path, monkeypatch):
+    import config
+    import orchestrator.tools as tools_mod
+    monkeypatch.setattr(config, "WORKING_DIR", tmp_path)
+    monkeypatch.setattr(tools_mod, "WORKING_DIR", tmp_path)
+    (tmp_path / "myproject").mkdir()
+    return tmp_path
+
+
+# ── VCR cassette factory ─────────────────────────────────────────────────────
+
+def make_vcr_cassette(cassette_name: str):
+    """
+    Return a vcrpy context manager for the given cassette name.
+    Set VCR_RECORD_MODE=new_episodes locally to record; CI uses 'none'.
+    Authorization headers are stripped so cassettes are safe to commit.
+    If the cassette doesn't exist in 'none' mode, the test is skipped.
+    """
+    import os
+    try:
+        import vcr
+    except ImportError:
+        import pytest
+        pytest.skip("vcrpy not installed")
+
+    record_mode = os.environ.get("VCR_RECORD_MODE", "none")
+    cassette_path = CASSETTES_DIR / cassette_name
+    cassette_path.parent.mkdir(parents=True, exist_ok=True)
+
+    if record_mode == "none" and not cassette_path.exists():
+        import pytest
+        pytest.skip(f"No cassette recorded yet: {cassette_name}. Run with VCR_RECORD_MODE=new_episodes to record.")
+
+    my_vcr = vcr.VCR(
+        record_mode=record_mode,
+        match_on=["method", "scheme", "host", "port", "path", "body"],
+        filter_headers=["authorization", "x-api-key"],
+        decode_compressed_response=True,
+    )
+    return my_vcr.use_cassette(str(cassette_path))
+
+
+@pytest.fixture
+def vcr_cassette():
+    return make_vcr_cassette
diff --git a/tests/features/agent/passthrough.feature b/tests/features/agent/passthrough.feature
new file mode 100644
index 0000000..36d11d3
--- /dev/null
+++ b/tests/features/agent/passthrough.feature
@@ -0,0 +1,19 @@
+Feature: Direct (passthrough) mode — bypass LLM for CC sessions
+
+  Background:
+    Given user "user_abc123" is sending commands
+    And run_claude returns "Done. Here is the result."
+
+  Scenario: Passthrough sends directly to CC without LLM
+    Given user has session "sess01" in "/tmp/proj1"
+    And active session is "sess01"
+    And direct mode is enabled for user "user_abc123"
+    When user sends agent message "run the tests"
+    Then run_claude was called
+    And reply contains "Done. Here is the result."
+
+  Scenario: Passthrough on missing session clears active conv
+    Given active session is "ghost_session_id" which does not exist
+    And direct mode is enabled for user "user_abc123"
+    When user sends agent message "hello"
+    Then active session for user "user_abc123" is None
diff --git a/tests/features/agent/routing.feature b/tests/features/agent/routing.feature
new file mode 100644
index 0000000..049fa8f
--- /dev/null
+++ b/tests/features/agent/routing.feature
@@ -0,0 +1,35 @@
+Feature: LLM smart routing — agent routes messages to correct tools
+
+  Background:
+    Given user "user_abc123" is in smart mode
+    And run_claude returns "I created the component for you."
+
+  @vcr
+  Scenario: Agent creates new session for project task
+    Given vcr cassette "agent/routing_new_session.yaml"
+    When user sends agent message "create a React app in todo_app folder"
+    Then agent created a session for user "user_abc123"
+    And reply is not empty
+
+  @vcr
+  Scenario: Agent answers general question without creating session
+    Given vcr cassette "agent/routing_general_qa.yaml"
+    When user sends agent message "what is a Python generator?"
+    Then no session is created for user "user_abc123"
+    And reply is not empty
+
+  @vcr
+  Scenario: Agent sends follow-up to existing session
+    Given user has active session "sess01" in "/tmp/proj1"
+    And vcr cassette "agent/routing_follow_up.yaml"
+    When user sends agent message "now add tests for that"
+    Then run_claude was called
+    And reply is not empty
+
+  @vcr
+  Scenario: Agent answers direct QA without tools when no active session
+    Given no active session for user "user_abc123"
+    And vcr cassette "agent/routing_direct_qa.yaml"
+    When user sends agent message "explain async/await in Python"
+    Then reply is not empty
+    And reply does not contain "Max iterations"
diff --git a/tests/features/commands/close.feature b/tests/features/commands/close.feature
new file mode 100644
index 0000000..26194ee
--- /dev/null
+++ b/tests/features/commands/close.feature
@@ -0,0 +1,38 @@
+Feature: /close command — terminate a session
+
+  Background:
+    Given user "user_abc123" is sending commands
+
+  Scenario: No sessions returns error
+    When user sends "/close"
+    Then reply contains "No sessions to close"
+
+  Scenario: Close active session by default
+    Given user has session "sess01" in "/tmp/proj1"
+    And active session is "sess01"
+    When user sends "/close"
+    Then reply contains "Closed session"
+    And session manager has 0 sessions for user "user_abc123"
+
+  Scenario: Close session by number
+    Given user has session "sess01" in "/tmp/proj1"
+    And user has session "sess02" in "/tmp/proj2"
+    When user sends "/close 1"
+    Then reply contains "Closed session"
+    And session manager has 1 session for user "user_abc123"
+
+  Scenario: Invalid number returns error
+    Given user has session "sess01" in "/tmp/proj1"
+    When user sends "/close 9"
+    Then reply contains "Invalid session number"
+
+  Scenario: Cannot close another user's session
+    Given session "sess01" in "/tmp/proj1" belongs to user "other_user"
+    When user sends "/close sess01"
+    Then reply contains "belongs to another user"
+
+  Scenario: Closing active session clears active conv
+    Given user has session "sess01" in "/tmp/proj1"
+    And active session is "sess01"
+    When user sends "/close"
+    Then active session for user "user_abc123" is None
diff --git a/tests/features/commands/direct_smart.feature b/tests/features/commands/direct_smart.feature
new file mode 100644
index 0000000..b55cdf8
--- /dev/null
+++ b/tests/features/commands/direct_smart.feature
@@ -0,0 +1,27 @@
+Feature: /direct and /smart mode toggle
+
+  Background:
+    Given user "user_abc123" is sending commands
+
+  Scenario: /direct requires active session
+    When user sends "/direct"
+    Then reply contains "No active session"
+
+  Scenario: /direct enables passthrough mode
+    Given user has session "sess01" in "/tmp/proj1"
+    And active session is "sess01"
+    When user sends "/direct"
+    Then reply contains "Direct mode ON"
+    And passthrough mode is enabled for user "user_abc123"
+
+  Scenario: /smart disables passthrough mode
+    Given user has session "sess01" in "/tmp/proj1"
+    And active session is "sess01"
+    And direct mode is enabled for user "user_abc123"
+    When user sends "/smart"
+    Then reply contains "Smart mode ON"
+    And passthrough mode is disabled for user "user_abc123"
+
+  Scenario: /smart always succeeds even without active session
+    When user sends "/smart"
+    Then reply contains "Smart mode ON"
diff --git a/tests/features/commands/help.feature b/tests/features/commands/help.feature
new file mode 100644
index 0000000..82f549a
--- /dev/null
+++ b/tests/features/commands/help.feature
@@ -0,0 +1,25 @@
+Feature: /help command — show command reference
+
+  Background:
+    Given user "user_abc123" is sending commands
+
+  Scenario: /help lists all commands
+    When user sends "/help"
+    Then reply contains "/new"
+    And reply contains "/status"
+    And reply contains "/close"
+    And reply contains "/switch"
+    And reply contains "/direct"
+    And reply contains "/smart"
+    And reply contains "/shell"
+    And reply contains "/remind"
+    And reply contains "/tasks"
+    And reply contains "/nodes"
+
+  Scenario: /h alias works
+    When user sends "/h"
+    Then reply contains "/new"
+
+  Scenario: Unknown command is not handled
+    When user sends "/unknown_xyz_cmd"
+    Then command is not handled
diff --git a/tests/features/commands/new.feature b/tests/features/commands/new.feature
new file mode 100644
index 0000000..433ae28
--- /dev/null
+++ b/tests/features/commands/new.feature
@@ -0,0 +1,37 @@
+Feature: /new command — create a Claude Code session
+
+  Background:
+    Given user "user_abc123" is sending commands
+
+  Scenario: No arguments shows usage
+    When user sends "/new"
+    Then reply contains "Usage: /new"
+
+  Scenario: Creates session with valid directory
+    Given run_claude returns "Session ready."
+    When user sends "/new myproject"
+    Then reply contains "myproject"
+    And session manager has 1 session for user "user_abc123"
+
+  Scenario: Creates session with initial message
+    Given run_claude returns "Fixed the bug."
+    When user sends "/new myproject fix the login bug"
+    Then reply contains "myproject"
+
+  Scenario: Path traversal attempt is blocked
+    When user sends "/new ../../etc"
+    Then reply contains "Error"
+    And session manager has 0 sessions for user "user_abc123"
+
+  Scenario: Custom timeout is accepted
+    Given run_claude returns "Done."
+    When user sends "/new myproject --timeout 60"
+    Then reply contains "myproject"
+    And reply contains "timeout: 60s"
+
+  Scenario: Creates session and sends card when chat_id is set
+    Given the current chat_id is "chat_xyz"
+    And run_claude returns "Ready."
+    When user sends "/new myproject"
+    Then a sessions card is sent to chat "chat_xyz"
+    And text reply is empty
diff --git a/tests/features/commands/nodes.feature b/tests/features/commands/nodes.feature
new file mode 100644
index 0000000..be78448
--- /dev/null
+++ b/tests/features/commands/nodes.feature
@@ -0,0 +1,13 @@
+Feature: /nodes and /node commands — multi-host node management
+
+  Background:
+    Given user "user_abc123" is sending commands
+    And ROUTER_MODE is disabled
+
+  Scenario: /nodes outside router mode returns explanation
+    When user sends "/nodes"
+    Then reply contains "Not in router mode"
+
+  Scenario: /node outside router mode returns explanation
+    When user sends "/node myhost"
+    Then reply contains "Not in router mode"
diff --git a/tests/features/commands/remind.feature b/tests/features/commands/remind.feature
new file mode 100644
index 0000000..a34fe4f
--- /dev/null
+++ b/tests/features/commands/remind.feature
@@ -0,0 +1,33 @@
+Feature: /remind command — schedule a one-time reminder
+
+  Background:
+    Given user "user_abc123" is sending commands
+    And the current chat_id is "chat_xyz"
+
+  Scenario: No arguments shows usage
+    When user sends "/remind"
+    Then reply contains "Usage: /remind"
+
+  Scenario: Missing message part shows usage
+    When user sends "/remind 10m"
+    Then reply contains "Usage: /remind"
+
+  Scenario: Invalid time format returns error
+    When user sends "/remind badtime check build"
+    Then reply contains "Invalid time format"
+
+  Scenario: Valid reminder with seconds is scheduled
+    When user sends "/remind 30s check the build"
+    Then reply contains "Reminder #"
+    And reply contains "30s"
+    And scheduler has 1 pending job
+
+  Scenario: Valid reminder with minutes is scheduled
+    When user sends "/remind 5m deploy done"
+    Then reply contains "5m"
+    And scheduler has 1 pending job
+
+  Scenario: Valid reminder with hours is scheduled
+    When user sends "/remind 2h weekly report"
+    Then reply contains "2h"
+    And scheduler has 1 pending job
diff --git a/tests/features/commands/shell.feature b/tests/features/commands/shell.feature
new file mode 100644
index 0000000..60a8467
--- /dev/null
+++ b/tests/features/commands/shell.feature
@@ -0,0 +1,22 @@
+Feature: /shell command — run host shell commands
+
+  Background:
+    Given user "user_abc123" is sending commands
+
+  Scenario: No arguments shows usage
+    When user sends "/shell"
+    Then reply contains "Usage: /shell"
+
+  Scenario: Runs echo and returns output
+    When user sends "/shell echo hello"
+    Then reply contains "hello"
+    And reply contains "exit code: 0"
+
+  Scenario: Blocked dangerous command is rejected
+    When user sends "/shell rm -rf /"
+    Then reply contains "Blocked"
+    And reply does not contain "exit code"
+
+  Scenario: Non-zero exit code is reported
+    When user sends "/shell exit 1"
+    Then reply contains "exit code"
diff --git a/tests/features/commands/status.feature b/tests/features/commands/status.feature
new file mode 100644
index 0000000..3e04b66
--- /dev/null
+++ b/tests/features/commands/status.feature
@@ -0,0 +1,40 @@
+Feature: /status command — list sessions and current mode
+
+  Background:
+    Given user "user_abc123" is sending commands
+
+  Scenario: No sessions returns empty message
+    When user sends "/status"
+    Then reply contains "No active sessions"
+
+  Scenario: Shows session list
+    Given user has session "sess01" in "/tmp/proj1"
+    And user has session "sess02" in "/tmp/proj2"
+    When user sends "/status"
+    Then reply contains "sess01"
+    And reply contains "sess02"
+
+  Scenario: Shows active marker on current session
+    Given user has session "sess01" in "/tmp/proj1"
+    And active session is "sess01"
+    When user sends "/status"
+    Then reply contains "→"
+
+  Scenario: Shows current mode as Smart by default
+    Given user has session "sess01" in "/tmp/proj1"
+    When user sends "/status"
+    Then reply contains "Smart"
+
+  Scenario: Shows Direct mode after /direct
+    Given user has session "sess01" in "/tmp/proj1"
+    And active session is "sess01"
+    And direct mode is enabled for user "user_abc123"
+    When user sends "/status"
+    Then reply contains "Direct"
+
+  Scenario: Sends card when chat_id is set
+    Given user has session "sess01" in "/tmp/proj1"
+    And the current chat_id is "chat_xyz"
+    When user sends "/status"
+    Then a sessions card is sent to chat "chat_xyz"
+    And text reply is empty
diff --git a/tests/features/commands/switch.feature b/tests/features/commands/switch.feature
new file mode 100644
index 0000000..bc93b12
--- /dev/null
+++ b/tests/features/commands/switch.feature
@@ -0,0 +1,30 @@
+Feature: /switch command — activate a different session
+
+  Background:
+    Given user "user_abc123" is sending commands
+
+  Scenario: No sessions returns error
+    When user sends "/switch 1"
+    Then reply contains "No sessions available"
+
+  Scenario: Valid switch updates active session
+    Given user has session "sess01" in "/tmp/proj1"
+    And user has session "sess02" in "/tmp/proj2"
+    When user sends "/switch 2"
+    Then reply contains "Switched to session"
+    And active session for user "user_abc123" is "sess02"
+
+  Scenario: Out of range number returns error
+    Given user has session "sess01" in "/tmp/proj1"
+    When user sends "/switch 5"
+    Then reply contains "Invalid session number"
+
+  Scenario: Non-numeric argument returns error
+    Given user has session "sess01" in "/tmp/proj1"
+    When user sends "/switch notanumber"
+    Then reply contains "Invalid number"
+
+  Scenario: Missing argument shows usage
+    Given user has session "sess01" in "/tmp/proj1"
+    When user sends "/switch"
+    Then reply contains "Usage: /switch"
diff --git a/tests/features/commands/tasks.feature b/tests/features/commands/tasks.feature
new file mode 100644
index 0000000..ec1bb5d
--- /dev/null
+++ b/tests/features/commands/tasks.feature
@@ -0,0 +1,27 @@
+Feature: /tasks command — list background tasks
+
+  Background:
+    Given user "user_abc123" is sending commands
+
+  Scenario: No tasks returns empty message
+    When user sends "/tasks"
+    Then reply contains "No background tasks"
+
+  Scenario: Shows running task with spinner emoji
+    Given there is a running task "task001" described as "CC session abc: fix bug"
+    When user sends "/tasks"
+    Then reply contains "task001"
+    And reply contains "fix bug"
+    And reply contains "⏳"
+
+  Scenario: Shows completed task with checkmark
+    Given there is a completed task "task002" described as "CC session xyz: deploy"
+    When user sends "/tasks"
+    Then reply contains "task002"
+    And reply contains "✅"
+
+  Scenario: Shows failed task with cross
+    Given there is a failed task "task003" described as "CC session err: bad cmd"
+    When user sends "/tasks"
+    Then reply contains "task003"
+    And reply contains "❌"
diff --git a/tests/keyring_test.yaml b/tests/keyring_test.yaml
new file mode 100644
index 0000000..dea01b4
--- /dev/null
+++ b/tests/keyring_test.yaml
@@ -0,0 +1,10 @@
+FEISHU_APP_ID: test_app_id
+FEISHU_APP_SECRET: test_app_secret
+OPENAI_BASE_URL: https://open.bigmodel.cn/api/paas/v4/
+OPENAI_API_KEY: test_api_key_for_vcr
+OPENAI_MODEL: glm-4.7
+WORKING_DIR: /tmp/phonework_test
+METASO_API_KEY: ""
+ROUTER_MODE: false
+ROUTER_SECRET: ""
+ALLOWED_OPEN_IDS: []
diff --git a/tests/step_defs/__init__.py b/tests/step_defs/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/step_defs/common_steps.py b/tests/step_defs/common_steps.py
new file mode 100644
index 0000000..3c2c8ff
--- /dev/null
+++ b/tests/step_defs/common_steps.py
@@ -0,0 +1,183 @@
+"""
+Shared Given/Then step definitions used across all feature files.
+"""
+from __future__ import annotations
+
+from pytest_bdd import given, then, parsers
+
+
+# ── Given: user identity ─────────────────────────────────────────────────────
+
+@given(parsers.parse('user "{user_id}" is sending commands'))
+def set_user(user_id, pytestconfig):
+    from orchestrator.tools import set_current_user
+    set_current_user(user_id)
+    pytestconfig._test_user_id = user_id
+
+
+@given(parsers.parse('the current chat_id is "{chat_id}"'))
+def set_chat(chat_id):
+    from orchestrator.tools import set_current_chat
+    set_current_chat(chat_id)
+
+
+# ── Given: session setup ─────────────────────────────────────────────────────
+
+@given(parsers.parse('user has session "{conv_id}" in "{cwd}"'))
+def add_session(conv_id, cwd, pytestconfig, tmp_path):
+    from agent.manager import manager, Session
+    user_id = getattr(pytestconfig, "_test_user_id", "user_abc123")
+    session = Session(conv_id=conv_id, cwd=str(tmp_path / conv_id), owner_id=user_id, cc_timeout=50.0)
+    (tmp_path / conv_id).mkdir(exist_ok=True)
+    manager._sessions[conv_id] = session
+
+
+@given(parsers.parse('session "{conv_id}" in "{cwd}" belongs to user "{owner}"'))
+def add_foreign_session(conv_id, cwd, owner, tmp_path):
+    from agent.manager import manager, Session
+    session = Session(conv_id=conv_id, cwd=str(tmp_path / conv_id), owner_id=owner, cc_timeout=50.0)
+    (tmp_path / conv_id).mkdir(exist_ok=True)
+    manager._sessions[conv_id] = session
+
+
+@given(parsers.parse('active session is "{conv_id}"'))
+def set_active_session(conv_id, pytestconfig):
+    from orchestrator.agent import agent
+    user_id = getattr(pytestconfig, "_test_user_id", "user_abc123")
+    agent._active_conv[user_id] = conv_id
+
+
+@given(parsers.parse('active session is "{conv_id}" which does not exist'))
+def set_ghost_active_session(conv_id, pytestconfig):
+    from orchestrator.agent import agent
+    user_id = getattr(pytestconfig, "_test_user_id", "user_abc123")
+    agent._active_conv[user_id] = conv_id
+    # intentionally NOT added to manager._sessions
+
+
+@given(parsers.parse('no active session for user "{user_id}"'))
+def ensure_no_active_session(user_id):
+    from orchestrator.agent import agent
+    agent._active_conv[user_id] = None
+
+
+# ── Given: mode toggles ──────────────────────────────────────────────────────
+
+@given(parsers.parse('direct mode is enabled for user "{user_id}"'))
+def enable_direct_mode(user_id):
+    from orchestrator.agent import agent
+    agent._passthrough[user_id] = True
+
+
+# ── Given: mocks ─────────────────────────────────────────────────────────────
+
+@given(parsers.parse('run_claude returns "{output}"'))
+def set_run_claude_return(output, mock_run_claude):
+    mock_run_claude.return_value = output
+
+
+# ── Given: config ────────────────────────────────────────────────────────────
+
+@given("ROUTER_MODE is disabled")
+def disable_router_mode(monkeypatch):
+    import config
+    monkeypatch.setattr(config, "ROUTER_MODE", False)
+
+
+# ── Then: reply assertions ───────────────────────────────────────────────────
+
+@then(parsers.parse('reply contains "{text}"'))
+def reply_contains(text, pytestconfig):
+    reply = getattr(pytestconfig, "_reply", None)
+    assert text in (reply or ""), \
+        f"Expected {text!r} in reply, got: {reply!r}"
+
+
+@then(parsers.parse('reply does not contain "{text}"'))
+def reply_not_contains(text, pytestconfig):
+    reply = getattr(pytestconfig, "_reply", None)
+    assert text not in (reply or ""), \
+        f"Expected {text!r} NOT in reply, got: {reply!r}"
+
+
+@then("reply is not empty")
+def reply_not_empty(pytestconfig):
+    reply = getattr(pytestconfig, "_reply", None)
+    assert reply and reply.strip(), \
+        f"Expected non-empty reply, got: {reply!r}"
+
+
+@then("text reply is empty")
+def reply_is_empty(pytestconfig):
+    reply = getattr(pytestconfig, "_reply", None)
+    assert reply == "", \
+        f"Expected empty reply, got: {reply!r}"
+
+
+@then("command is not handled")
+def command_not_handled(pytestconfig):
+    reply = getattr(pytestconfig, "_reply", None)
+    assert reply is None
+
+
+# ── Then: session state ──────────────────────────────────────────────────────
+
+@then(parsers.parse('session manager has {count:d} session for user "{user_id}"'))
+@then(parsers.parse('session manager has {count:d} sessions for user "{user_id}"'))
+def check_session_count(count, user_id):
+    from agent.manager import manager
+    sessions = manager.list_sessions(user_id=user_id)
+    assert len(sessions) == count, \
+        f"Expected {count} sessions, got {len(sessions)}: {sessions}"
+
+
+@then(parsers.parse('active session for user "{user_id}" is "{conv_id}"'))
+def check_active_session(user_id, conv_id):
+    from orchestrator.agent import agent
+    assert agent._active_conv.get(user_id) == conv_id
+
+
+@then(parsers.parse('active session for user "{user_id}" is None'))
+def check_no_active_session(user_id):
+    from orchestrator.agent import agent
+    assert agent._active_conv.get(user_id) is None
+
+
+# ── Then: mode state ─────────────────────────────────────────────────────────
+
+@then(parsers.parse('passthrough mode is enabled for user "{user_id}"'))
+def check_passthrough_on(user_id):
+    from orchestrator.agent import agent
+    assert agent._passthrough.get(user_id) is True
+
+
+@then(parsers.parse('passthrough mode is disabled for user "{user_id}"'))
+def check_passthrough_off(user_id):
+    from orchestrator.agent import agent
+    assert agent._passthrough.get(user_id) is False
+
+
+# ── Then: Feishu output ──────────────────────────────────────────────────────
+
+@then(parsers.parse('a sessions card is sent to chat "{chat_id}"'))
+def check_card_sent(chat_id, feishu_calls):
+    cards = feishu_calls["cards"]
+    assert any(c["receive_id"] == chat_id for c in cards), \
+        f"No card sent to {chat_id!r}, captured: {cards}"
+
+
+# ── Then: scheduler ──────────────────────────────────────────────────────────
+
+@then(parsers.parse('scheduler has {count:d} pending job'))
+@then(parsers.parse('scheduler has {count:d} pending jobs'))
+def check_scheduler_jobs(count):
+    from agent.scheduler import scheduler
+    assert len(scheduler._jobs) == count, \
+        f"Expected {count} jobs, got {len(scheduler._jobs)}"
+
+
+# ── Then: run_claude ─────────────────────────────────────────────────────────
+
+@then("run_claude was called")
+def check_run_claude_called(mock_run_claude):
+    assert mock_run_claude.call_count >= 1, "Expected run_claude to be called"
diff --git a/tests/step_defs/test_agent.py b/tests/step_defs/test_agent.py
new file mode 100644
index 0000000..4031c55
--- /dev/null
+++ b/tests/step_defs/test_agent.py
@@ -0,0 +1,76 @@
+"""
+Step definitions for agent routing and passthrough features.
+"""
+from __future__ import annotations
+
+from pytest_bdd import scenarios, given, when, then, parsers
+
+from tests.step_defs.common_steps import *  # noqa: F401,F403 — import shared steps
+
+scenarios(
+    "../features/agent/routing.feature",
+    "../features/agent/passthrough.feature",
+)
+
+
+# ── Given: agent-specific setup ──────────────────────────────────────────────
+
+@given(parsers.parse('user "{user_id}" is in smart mode'))
+def set_smart_mode(user_id, pytestconfig):
+    from orchestrator.agent import agent
+    from orchestrator.tools import set_current_user
+    set_current_user(user_id)
+    agent._passthrough[user_id] = False
+    pytestconfig._test_user_id = user_id
+
+
+@given(parsers.parse('user has active session "{conv_id}" in "{cwd}"'))
+def add_and_activate_session(conv_id, cwd, pytestconfig, tmp_path):
+    from agent.manager import manager, Session
+    from orchestrator.agent import agent
+    user_id = getattr(pytestconfig, "_test_user_id", "user_abc123")
+    session = Session(conv_id=conv_id, cwd=str(tmp_path / conv_id), owner_id=user_id, cc_timeout=50.0)
+    (tmp_path / conv_id).mkdir(exist_ok=True)
+    manager._sessions[conv_id] = session
+    agent._active_conv[user_id] = conv_id
+
+
+@given(parsers.parse('vcr cassette "{cassette_name}"'))
+def set_vcr_cassette(cassette_name, pytestconfig):
+    pytestconfig._vcr_cassette = cassette_name
+
+
+# ── When: send message through agent ─────────────────────────────────────────
+
+@when(parsers.parse('user sends agent message "{text}"'))
+def send_agent_message(text, pytestconfig, mock_run_claude, feishu_calls):
+    import asyncio
+    from orchestrator.agent import agent
+    from tests.conftest import make_vcr_cassette
+    user_id = getattr(pytestconfig, "_test_user_id", "user_abc123")
+    cassette_name = getattr(pytestconfig, "_vcr_cassette", None)
+    loop = asyncio.get_event_loop()
+
+    if cassette_name:
+        with make_vcr_cassette(cassette_name):
+            reply = loop.run_until_complete(agent.run(user_id, text))
+    else:
+        reply = loop.run_until_complete(agent.run(user_id, text))
+
+    pytestconfig._reply = reply
+
+
+# ── Then: agent-specific assertions ─────────────────────────────────────────
+
+@then(parsers.parse('agent created a session for user "{user_id}"'))
+def check_session_created(user_id):
+    from orchestrator.agent import agent
+    assert agent._active_conv.get(user_id) is not None, \
+        f"Expected active session to be set for {user_id}"
+
+
+@then(parsers.parse('no session is created for user "{user_id}"'))
+def check_no_session(user_id):
+    from orchestrator.agent import agent
+    assert agent._active_conv.get(user_id) is None, \
+        f"Expected no active session for {user_id}, got {agent._active_conv.get(user_id)}"
diff --git a/tests/step_defs/test_commands.py b/tests/step_defs/test_commands.py
new file mode 100644
index 0000000..d0239ae
--- /dev/null
+++ b/tests/step_defs/test_commands.py
@@ -0,0 +1,78 @@
+"""
+Step definitions for all slash command features.
+"""
+from __future__ import annotations
+
+import time
+
+from pytest_bdd import scenarios, given, when, then, parsers
+
+from tests.step_defs.common_steps import *  # noqa: F401,F403 — import shared steps
+
+scenarios(
+    "../features/commands/new.feature",
+    "../features/commands/status.feature",
+    "../features/commands/switch.feature",
+    "../features/commands/close.feature",
+    "../features/commands/direct_smart.feature",
+    "../features/commands/shell.feature",
+    "../features/commands/remind.feature",
+    "../features/commands/tasks.feature",
+    "../features/commands/nodes.feature",
+    "../features/commands/help.feature",
+)
+
+
+# ── When: send slash command ─────────────────────────────────────────────────
+
+@when(parsers.parse('user sends "{text}"'))
+def send_command(text, pytestconfig, feishu_calls, mock_run_claude):
+    import asyncio
+    from bot.commands import handle_command
+    user_id = getattr(pytestconfig, "_test_user_id", "user_abc123")
+    reply = asyncio.get_event_loop().run_until_complete(handle_command(user_id, text))
+    pytestconfig._reply = reply
+
+
+# ── Given: task runner state ─────────────────────────────────────────────────
+
+@given(parsers.parse('there is a running task "{task_id}" described as "{desc}"'))
+def add_running_task(task_id, desc):
+    from agent.task_runner import task_runner, BackgroundTask, TaskStatus
+    task = BackgroundTask(
+        task_id=task_id,
+        description=desc,
+        started_at=time.time(),
+        status=TaskStatus.RUNNING,
+    )
+    task_runner._tasks[task_id] = task
+
+
+@given(parsers.parse('there is a completed task "{task_id}" described as "{desc}"'))
+def add_completed_task(task_id, desc):
+    from agent.task_runner import task_runner, BackgroundTask, TaskStatus
+    now = time.time()
+    task = BackgroundTask(
+        task_id=task_id,
+        description=desc,
+        started_at=now - 5,
+        status=TaskStatus.COMPLETED,
+        completed_at=now,
+        result="success",
+    )
+    task_runner._tasks[task_id] = task
+
+
+@given(parsers.parse('there is a failed task "{task_id}" described as "{desc}"'))
+def add_failed_task(task_id, desc):
+    from agent.task_runner import task_runner, BackgroundTask, TaskStatus
+    now = time.time()
+    task = BackgroundTask(
+        task_id=task_id,
+        description=desc,
+        started_at=now - 3,
+        status=TaskStatus.FAILED,
+        completed_at=now,
+        error="subprocess failed",
+    )
+    task_runner._tasks[task_id] = task