Résumé IA
Ce tutoriel présente la construction d'un agent de décision en streaming capable d'agir dans un environnement dynamique avec des obstacles mobiles et un objectif changeant. L'agent utilise un planificateur A* en horizon glissant qui réévalue fréquemment la trajectoire et peut annuler son plan en cours d'exécution si une étape devient invalide ou risquée. Le raisonnement intermédiaire est émis en continu sous forme d'événements structurés via Pydantic, permettant une adaptation réactive à chaque étape sans suivre aveuglément une trajectoire obsolète.
In this tutorial, we build a Streaming Decision Agent that thinks and acts in an online, changing environment while continuously streaming safe, partial reasoning updates. We implement a dynamic grid world with moving obstacles and a shifting goal, then use an online A* planner in a receding-horizon loop to commit to only a few near-term moves and re-evaluate frequently. As we execute, we make intermediate decisions that can override the plan when a step becomes invalid or locally risky, allowing us to adapt mid-run rather than unthinkingly following a stale trajectory. Copy Code Copied Use a different Browser import random, math, time from dataclasses import dataclass, field from typing import List, Tuple, Dict, Optional, Generator, Any from collections import deque, defaultdict try: from pydantic import BaseModel, Field except Exception: raise RuntimeError("Please install pydantic: `!pip -q install pydantic` (then rerun).") class StreamEvent(BaseModel): t: float = Field(..., description="Wall-clock time (seconds since start)") kind: str = Field(..., description="event type, e.g., plan/update/act/observe/done") step: int = Field(..., description="agent step counter") msg: str = Field(..., description="human-readable partial reasoning summary") data: Dict[str, Any] = Field(default_factory=dict, description="structured payload") Coord = Tuple[int, int] We define the streaming event schema and core type structures that allow us to emit structured reasoning updates. We use Pydantic to formalize the structure of each streamed decision or observation safely and consistently. We establish the foundational interface that powers incremental reasoning throughout the agent lifecycle. Copy Code Copied Use a different Browser @dataclass class DynamicGridWorld: w: int = 18 h: int = 10 obstacle_ratio: float = 0.18 seed: int = 7 move_obstacles_every: int = 6 spawn_obstacle_prob: float = 0.25 clear_obstacle_prob: float = 0.15 target_jitter_prob: float = 0.35 rng: random.Random = field(init=False) obstacles: set = field(init=False, default_factory=set) agent: Coord = field(init=False, default=(1, 1)) target: Coord = field(init=False, default=(15, 7)) step_count: int = field(init=False, default=0) def __post_init__(self): self.rng = random.Random(self.seed) self.reset() def reset(self): self.step_count = 0 self.obstacles = set() for y in range(self.h): for x in range(self.w): if (x, y) in [(1, 1), (self.w - 2, self.h - 2)]: continue if self.rng.random() < self.obstacle_ratio: self.obstacles.add((x, y)) self.agent = (1, 1) self.target = (self.w - 2, self.h - 2) self._ensure_free(self.agent) self._ensure_free(self.target) def _ensure_free(self, c: Coord): if c in self.obstacles: self.obstacles.remove(c) def in_bounds(self, c: Coord) -> bool: x, y = c return 0 <= x < self.w and 0 <= y < self.h def passable(self, c: Coord) -> bool: return c not in self.obstacles def neighbors4(self, c: Coord) -> List[Coord]: x, y = c cand = [(x+1,y), (x-1,y), (x,y+1), (x,y-1)] return [p for p in cand if self.in_bounds(p) and self.passable(p)] def manhattan(self, a: Coord, b: Coord) -> int: return abs(a[0]-b[0]) + abs(a[1]-b[1]) def maybe_world_changes(self) -> Dict[str, Any]: changes = {"obstacles_added": [], "obstacles_cleared": [], "target_moved": False} self.step_count += 1 if self.rng.random() < self.target_jitter_prob: tx, ty = self.target options = [(tx+1,ty),(tx-1,ty),(tx,ty+1),(tx,ty-1)] options = [c for c in options if self.in_bounds(c) and c != self.agent] self.rng.shuffle(options) for c in options[:3]: if c not in self.obstacles: self.target = c changes["target_moved"] = True break if self.step_count % self.move_obstacles_every == 0: for _ in range(4): if self.rng.random() < self.clear_obstacle_prob and self.obstacles: c = self.rng.choice(tuple(self.obstacles)) self.obstacles.remove(c) changes["obstacles_cleared"].append(c) for _ in range(5): if self.rng.random() < self.spawn_obstacle_prob: c = (self.rng.randrange(self.w), self.rng.randrange(self.h)) if c != self.agent and c != self.target: self.obstacles.add(c) changes["obstacles_added"].append(c) self._ensure_free(self.agent) self._ensure_free(self.target) return changes def step(self, action: str) -> Dict[str, Any]: ax, ay = self.agent move = {"R": (ax+1, ay), "L": (ax-1, ay), "D": (ax, ay+1), "U": (ax, ay-1), "S": (ax, ay)}[action] moved = False if self.in_bounds(move) and self.passable(move): self.agent = move moved = True changes = self.maybe_world_changes() done = (self.agent == self.target) return {"moved": moved, "agent": self.agent, "target": self.target, "done": done, "changes": changes} def render(self, path: Optional[List[Coord]] = None) -> str: path_set = set(path or []) lines = [] for y in range(self.h): row = [] for x in range(self.w): c = (x, y) if c == self.agent: row.append("A") elif c == self.target: row.append("T") elif c in path_set: row.append("·") elif c in self.obstacles: row.append("█") else: row.append(" ") lines.append("".join(row)) border = "+" + "-" *