|
@@ -4,11 +4,12 @@ Tunnel session management service for SSH and Dashboard tunnels.
|
|
|
|
|
|
|
|
import asyncio
|
|
import asyncio
|
|
|
import os
|
|
import os
|
|
|
|
|
+import re
|
|
|
import signal
|
|
import signal
|
|
|
import subprocess
|
|
import subprocess
|
|
|
import uuid as uuid_module
|
|
import uuid as uuid_module
|
|
|
from datetime import datetime, timedelta
|
|
from datetime import datetime, timedelta
|
|
|
-from typing import Dict, Optional
|
|
|
|
|
|
|
+from typing import Dict, Optional, List, Tuple
|
|
|
|
|
|
|
|
from pydantic import BaseModel
|
|
from pydantic import BaseModel
|
|
|
|
|
|
|
@@ -47,57 +48,52 @@ class TunnelService:
|
|
|
def __init__(self):
|
|
def __init__(self):
|
|
|
self.sessions: Dict[str, TunnelSession] = {}
|
|
self.sessions: Dict[str, TunnelSession] = {}
|
|
|
self.tunnel_status: Dict[str, TunnelStatus] = {}
|
|
self.tunnel_status: Dict[str, TunnelStatus] = {}
|
|
|
- self.cleanup_task = None
|
|
|
|
|
-
|
|
|
|
|
- def start_background_cleanup(self):
|
|
|
|
|
- """Start background task for cleanup inactive sessions"""
|
|
|
|
|
- if not self.cleanup_task:
|
|
|
|
|
- self.cleanup_task = asyncio.create_task(self._cleanup_loop())
|
|
|
|
|
-
|
|
|
|
|
- async def _cleanup_loop(self):
|
|
|
|
|
- """Background cleanup loop"""
|
|
|
|
|
- while True:
|
|
|
|
|
- await asyncio.sleep(300) # Every 5 minutes
|
|
|
|
|
- await self.cleanup_inactive_sessions()
|
|
|
|
|
|
|
|
|
|
async def cleanup_inactive_sessions(self):
|
|
async def cleanup_inactive_sessions(self):
|
|
|
"""
|
|
"""
|
|
|
Kill ttyd processes with no heartbeat for 60 minutes
|
|
Kill ttyd processes with no heartbeat for 60 minutes
|
|
|
Remove expired sessions
|
|
Remove expired sessions
|
|
|
|
|
+ Returns list of (device_id, tunnel_type) tuples for tunnels that should be disabled
|
|
|
"""
|
|
"""
|
|
|
now = datetime.now()
|
|
now = datetime.now()
|
|
|
inactive_threshold = now - timedelta(minutes=60)
|
|
inactive_threshold = now - timedelta(minutes=60)
|
|
|
grace_period = now - timedelta(seconds=60)
|
|
grace_period = now - timedelta(seconds=60)
|
|
|
initial_grace = now - timedelta(minutes=2)
|
|
initial_grace = now - timedelta(minutes=2)
|
|
|
|
|
|
|
|
|
|
+ tunnels_to_disable = [] # List of (device_id, tunnel_type) to disable
|
|
|
|
|
+
|
|
|
for session_uuid, session in list(self.sessions.items()):
|
|
for session_uuid, session in list(self.sessions.items()):
|
|
|
- # Check expiration (hard limit: 1 hour)
|
|
|
|
|
- if now > session.expires_at:
|
|
|
|
|
- print(f"[tunnel] Session expired: {session_uuid}")
|
|
|
|
|
- self._kill_ttyd(session.ttyd_pid)
|
|
|
|
|
- del self.sessions[session_uuid]
|
|
|
|
|
- continue
|
|
|
|
|
|
|
+ should_cleanup = False
|
|
|
|
|
+ reason = ""
|
|
|
|
|
|
|
|
|
|
+ # Check expiration (hard limit: 120 minutes)
|
|
|
|
|
+ if now > session.expires_at:
|
|
|
|
|
+ should_cleanup = True
|
|
|
|
|
+ reason = "Session expired"
|
|
|
# Check if tab was never opened (ttyd spawned but no heartbeat after 2 min)
|
|
# Check if tab was never opened (ttyd spawned but no heartbeat after 2 min)
|
|
|
- if (session.ttyd_pid and not session.last_heartbeat and
|
|
|
|
|
- session.created_at < initial_grace):
|
|
|
|
|
- print(f"[tunnel] Session never opened (no heartbeat): {session_uuid}")
|
|
|
|
|
- self._kill_ttyd(session.ttyd_pid)
|
|
|
|
|
- del self.sessions[session_uuid]
|
|
|
|
|
- continue
|
|
|
|
|
-
|
|
|
|
|
|
|
+ elif (session.ttyd_pid and not session.last_heartbeat and
|
|
|
|
|
+ session.created_at < initial_grace):
|
|
|
|
|
+ should_cleanup = True
|
|
|
|
|
+ reason = "Session never opened (no heartbeat)"
|
|
|
# Check inactivity (60 minutes without heartbeat)
|
|
# Check inactivity (60 minutes without heartbeat)
|
|
|
- if session.last_heartbeat and session.last_heartbeat < inactive_threshold:
|
|
|
|
|
- print(f"[tunnel] Session inactive for 60 min: {session_uuid}")
|
|
|
|
|
|
|
+ elif session.last_heartbeat and session.last_heartbeat < inactive_threshold:
|
|
|
|
|
+ should_cleanup = True
|
|
|
|
|
+ reason = "Session inactive for 60 min"
|
|
|
|
|
+ # Grace period: if tab closed, wait 60 seconds before killing
|
|
|
|
|
+ elif session.last_heartbeat and session.last_heartbeat < grace_period:
|
|
|
|
|
+ if session.ttyd_pid and not self._is_process_alive(session.ttyd_pid):
|
|
|
|
|
+ should_cleanup = True
|
|
|
|
|
+ reason = "ttyd process dead"
|
|
|
|
|
+
|
|
|
|
|
+ if should_cleanup:
|
|
|
|
|
+ print(f"[tunnel] {reason}: {session_uuid}")
|
|
|
self._kill_ttyd(session.ttyd_pid)
|
|
self._kill_ttyd(session.ttyd_pid)
|
|
|
del self.sessions[session_uuid]
|
|
del self.sessions[session_uuid]
|
|
|
- continue
|
|
|
|
|
|
|
|
|
|
- # Grace period: if tab closed, wait 60 seconds before killing
|
|
|
|
|
- if session.last_heartbeat and session.last_heartbeat < grace_period:
|
|
|
|
|
- if session.ttyd_pid and not self._is_process_alive(session.ttyd_pid):
|
|
|
|
|
- print(f"[tunnel] ttyd process dead: {session_uuid}")
|
|
|
|
|
- del self.sessions[session_uuid]
|
|
|
|
|
|
|
+ # Mark tunnel for disabling on device
|
|
|
|
|
+ tunnels_to_disable.append((session.device_id, session.tunnel_type))
|
|
|
|
|
+
|
|
|
|
|
+ return tunnels_to_disable
|
|
|
|
|
|
|
|
def create_session(
|
|
def create_session(
|
|
|
self,
|
|
self,
|
|
@@ -115,7 +111,7 @@ class TunnelService:
|
|
|
admin_user=admin_user,
|
|
admin_user=admin_user,
|
|
|
tunnel_type=tunnel_type,
|
|
tunnel_type=tunnel_type,
|
|
|
created_at=now,
|
|
created_at=now,
|
|
|
- expires_at=now + timedelta(hours=1),
|
|
|
|
|
|
|
+ expires_at=now + timedelta(minutes=120), # 2 hours
|
|
|
status="waiting"
|
|
status="waiting"
|
|
|
)
|
|
)
|
|
|
|
|
|
|
@@ -290,6 +286,120 @@ class TunnelService:
|
|
|
except ProcessLookupError:
|
|
except ProcessLookupError:
|
|
|
return False
|
|
return False
|
|
|
|
|
|
|
|
|
|
+ def _is_port_listening(self, port: int) -> bool:
|
|
|
|
|
+ """Check if port is listening (tunnel is open)"""
|
|
|
|
|
+ try:
|
|
|
|
|
+ result = subprocess.run(
|
|
|
|
|
+ ["ss", "-tln"],
|
|
|
|
|
+ capture_output=True,
|
|
|
|
|
+ text=True,
|
|
|
|
|
+ timeout=5
|
|
|
|
|
+ )
|
|
|
|
|
+ # Look for port in LISTEN state
|
|
|
|
|
+ return f":{port}" in result.stdout
|
|
|
|
|
+ except Exception as e:
|
|
|
|
|
+ print(f"[watchdog] Error checking port {port}: {e}")
|
|
|
|
|
+ return False
|
|
|
|
|
+
|
|
|
|
|
+ def _get_running_ttyd_processes(self) -> List[Tuple[int, int, int]]:
|
|
|
|
|
+ """
|
|
|
|
|
+ Get all running ttyd processes for tunnels.
|
|
|
|
|
+ Returns: List of (pid, ttyd_port, ssh_tunnel_port)
|
|
|
|
|
+ """
|
|
|
|
|
+ try:
|
|
|
|
|
+ result = subprocess.run(
|
|
|
|
|
+ ["ps", "aux"],
|
|
|
|
|
+ capture_output=True,
|
|
|
|
|
+ text=True,
|
|
|
|
|
+ timeout=5
|
|
|
|
|
+ )
|
|
|
|
|
+
|
|
|
|
|
+ processes = []
|
|
|
|
|
+ for line in result.stdout.split('\n'):
|
|
|
|
|
+ # Look for: ttyd --port 45XXX --writable ssh -p 50XXX
|
|
|
|
|
+ if 'ttyd' in line and '--port 45' in line:
|
|
|
|
|
+ # Extract PID
|
|
|
|
|
+ parts = line.split()
|
|
|
|
|
+ pid = int(parts[1])
|
|
|
|
|
+
|
|
|
|
|
+ # Extract ttyd port (--port 45XXX)
|
|
|
|
|
+ port_match = re.search(r'--port (\d+)', line)
|
|
|
|
|
+ # Extract SSH tunnel port (-p 50XXX or -p 60XXX)
|
|
|
|
|
+ ssh_port_match = re.search(r'ssh -p (\d+)', line)
|
|
|
|
|
+
|
|
|
|
|
+ if port_match and ssh_port_match:
|
|
|
|
|
+ ttyd_port = int(port_match.group(1))
|
|
|
|
|
+ ssh_port = int(ssh_port_match.group(1))
|
|
|
|
|
+ processes.append((pid, ttyd_port, ssh_port))
|
|
|
|
|
+
|
|
|
|
|
+ return processes
|
|
|
|
|
+ except Exception as e:
|
|
|
|
|
+ print(f"[watchdog] Error getting ttyd processes: {e}")
|
|
|
|
|
+ return []
|
|
|
|
|
+
|
|
|
|
|
+ async def watchdog_cleanup(self) -> List[Tuple[str, str]]:
|
|
|
|
|
+ """
|
|
|
|
|
+ Watchdog: Kill orphaned ttyd processes and cleanup stale sessions.
|
|
|
|
|
+ This runs independently of in-memory sessions and survives restarts.
|
|
|
|
|
+
|
|
|
|
|
+ Returns: List of (device_id, tunnel_type) to disable in config
|
|
|
|
|
+ """
|
|
|
|
|
+ tunnels_to_disable = []
|
|
|
|
|
+ now = datetime.now()
|
|
|
|
|
+
|
|
|
|
|
+ print("[watchdog] Running tunnel watchdog...")
|
|
|
|
|
+
|
|
|
|
|
+ # 1. Check all running ttyd processes
|
|
|
|
|
+ ttyd_processes = self._get_running_ttyd_processes()
|
|
|
|
|
+ print(f"[watchdog] Found {len(ttyd_processes)} ttyd processes")
|
|
|
|
|
+
|
|
|
|
|
+ for pid, ttyd_port, tunnel_port in ttyd_processes:
|
|
|
|
|
+ # Check if tunnel port is still open
|
|
|
|
|
+ if not self._is_port_listening(tunnel_port):
|
|
|
|
|
+ print(f"[watchdog] Tunnel port {tunnel_port} closed, killing ttyd {pid} (port {ttyd_port})")
|
|
|
|
|
+ self._kill_ttyd(pid)
|
|
|
|
|
+
|
|
|
|
|
+ # Find session and mark for config update
|
|
|
|
|
+ for session in self.sessions.values():
|
|
|
|
|
+ if session.ttyd_pid == pid:
|
|
|
|
|
+ tunnels_to_disable.append((session.device_id, session.tunnel_type))
|
|
|
|
|
+ break
|
|
|
|
|
+
|
|
|
|
|
+ # 2. Check in-memory sessions
|
|
|
|
|
+ grace_period = now - timedelta(seconds=60)
|
|
|
|
|
+ inactive_threshold = now - timedelta(minutes=60)
|
|
|
|
|
+ initial_grace = now - timedelta(minutes=2)
|
|
|
|
|
+
|
|
|
|
|
+ for session_uuid, session in list(self.sessions.items()):
|
|
|
|
|
+ should_cleanup = False
|
|
|
|
|
+ reason = ""
|
|
|
|
|
+
|
|
|
|
|
+ # Check expiration (hard limit: 120 minutes)
|
|
|
|
|
+ if now > session.expires_at:
|
|
|
|
|
+ should_cleanup = True
|
|
|
|
|
+ reason = "Session expired (120 min)"
|
|
|
|
|
+ # Check if tab was never opened
|
|
|
|
|
+ elif (session.ttyd_pid and not session.last_heartbeat and
|
|
|
|
|
+ session.created_at < initial_grace):
|
|
|
|
|
+ should_cleanup = True
|
|
|
|
|
+ reason = "Session never opened (no heartbeat)"
|
|
|
|
|
+ # Check inactivity (60 minutes without heartbeat)
|
|
|
|
|
+ elif session.last_heartbeat and session.last_heartbeat < inactive_threshold:
|
|
|
|
|
+ should_cleanup = True
|
|
|
|
|
+ reason = "Session inactive for 60 min"
|
|
|
|
|
+ # Grace period: if tab closed, wait 60 seconds
|
|
|
|
|
+ elif session.last_heartbeat and session.last_heartbeat < grace_period:
|
|
|
|
|
+ should_cleanup = True
|
|
|
|
|
+ reason = "Tab closed (60s grace period)"
|
|
|
|
|
+
|
|
|
|
|
+ if should_cleanup:
|
|
|
|
|
+ print(f"[watchdog] {reason}: {session_uuid}")
|
|
|
|
|
+ self._kill_ttyd(session.ttyd_pid)
|
|
|
|
|
+ del self.sessions[session_uuid]
|
|
|
|
|
+ tunnels_to_disable.append((session.device_id, session.tunnel_type))
|
|
|
|
|
+
|
|
|
|
|
+ return tunnels_to_disable
|
|
|
|
|
+
|
|
|
|
|
|
|
|
# Global tunnel service instance
|
|
# Global tunnel service instance
|
|
|
tunnel_service = TunnelService()
|
|
tunnel_service = TunnelService()
|