1
0
Fork 0
mirror of https://github.com/SeriousBug/dotfiles synced 2026-06-17 04:45:20 -05:00
dotfiles/claude/skills/cc-compact/compact_session.py
Kaan Barmore-Genc b0070920e3 Add cc-compact skill and claude-token-count CLI
- cc-compact: skill to reload a past Claude Code session's context
  (resolves by id or ai-title, extracts a bounded XML summary via
  compact_session.py) so work can be resumed without ingesting the log
- migrate the existing new-work skill into the repo
- deploy ~/.claude/skills via ensure_dir_symlink in setup.sh
- claude-token-count: count tokens via Anthropic's count_tokens endpoint,
  pulling the API key from 1Password at runtime
2026-05-31 02:28:42 -05:00

263 lines
10 KiB
Python
Executable file

#!/usr/bin/env python3
"""Compact a Claude Code session log into a small, bounded summary.
This reads a session JSONL file *carefully* — it never dumps the whole
history. It extracts only the few signals needed to understand what the
session was about and what the agent was doing at the end:
- header metadata (project, branch, time span, message counts)
- the first few exchanges (user prompt + the agent's reply, truncated)
- a few exchanges randomly sampled from the middle (non-overlapping)
- the last few exchanges (user prompt + the agent's reply, truncated)
- the most-edited files (top N by edit count)
- the final assistant text (what it was saying last)
- the last few tool calls (what it was doing last)
Session resolution (pick one):
--file PATH use this JSONL file directly
--id UUID find <UUID>.jsonl under the projects dir
--title TEXT find the session whose ai-title contains TEXT
(case-insensitive substring; newest match wins)
Sessions live at: ~/.claude/projects/<encoded-cwd>/<session-id>.jsonl
"""
import argparse
import glob
import json
import os
import random
import sys
PROJECTS_DIR = os.path.expanduser("~/.claude/projects")
# Tools that change files on disk, and which input field holds the path.
EDIT_TOOLS = {
"Edit": "file_path",
"Write": "file_path",
"MultiEdit": "file_path",
"NotebookEdit": "notebook_path",
}
def truncate(text, n):
"""Collapse whitespace to a single line, then clip to n chars.
Use for compact one-liners like tool arguments."""
text = " ".join(text.split())
return text if len(text) <= n else text[: n - 1] + ""
def clip(text, n):
"""Clip to n chars while preserving newlines/indentation.
Use for quoted messages where structure matters."""
text = text.strip("\n")
return text if len(text) <= n else text[:n].rstrip() + " …[truncated]"
def esc(text):
"""Escape XML metacharacters for tag values and attributes."""
return str(text).replace("&", "&amp;").replace("<", "&lt;").replace(">", "&gt;")
def content_to_text(content):
"""A message's .content is either a string or a list of typed blocks."""
if isinstance(content, str):
return content
if isinstance(content, list):
parts = [b.get("text", "") for b in content if isinstance(b, dict) and b.get("type") == "text"]
return "\n".join(p for p in parts if p)
return ""
def is_genuine_prompt(rec):
"""A real human-typed prompt: a user message that isn't a tool result,
meta record, slash-command wrapper, or interrupt marker."""
if rec.get("type") != "user" or rec.get("isMeta"):
return False
content = rec.get("message", {}).get("content")
if not isinstance(content, str):
return False
s = content.lstrip()
if not s:
return False
skip_prefixes = ("<", "[Request interrupted", "Caveat:")
return not s.startswith(skip_prefixes)
def resolve_file(args):
if args.file:
return os.path.expanduser(args.file)
if args.id:
matches = glob.glob(os.path.join(PROJECTS_DIR, "**", f"{args.id}.jsonl"), recursive=True)
if not matches:
sys.exit(f"No session file found for id {args.id} under {PROJECTS_DIR}")
return matches[0]
if args.title:
needle = args.title.lower()
candidates = [] # (mtime, path, title)
for path in glob.glob(os.path.join(PROJECTS_DIR, "**", "*.jsonl"), recursive=True):
title = None
try:
with open(path, encoding="utf-8") as fh:
for line in fh:
if '"ai-title"' not in line:
continue
try:
rec = json.loads(line)
except json.JSONDecodeError:
continue
if rec.get("type") == "ai-title":
title = rec.get("aiTitle", "")
if needle in title.lower():
break
title = None
except OSError:
continue
if title is not None:
candidates.append((os.path.getmtime(path), path, title))
if not candidates:
sys.exit(f"No session whose ai-title contains {args.title!r}")
candidates.sort(reverse=True)
if len(candidates) > 1:
sys.stderr.write("Multiple matches (using newest):\n")
for _, path, title in candidates:
sys.stderr.write(f" {path}{title}\n")
return candidates[0][1]
sys.exit("Provide one of --file, --id, or --title")
def main():
ap = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter)
g = ap.add_mutually_exclusive_group(required=True)
g.add_argument("--file", help="path to a session .jsonl")
g.add_argument("--id", help="session UUID")
g.add_argument("--title", help="substring of the session's ai-title")
ap.add_argument("--first", type=int, default=5, help="how many opening exchanges")
ap.add_argument("--last", type=int, default=10, help="how many closing exchanges")
ap.add_argument("--middle", type=int, default=5, help="how many exchanges randomly sampled from the middle")
ap.add_argument("--top-files", type=int, default=10, help="how many most-edited files")
ap.add_argument("--maxlen", type=int, default=800, help="max chars per quoted message")
ap.add_argument("--seed", type=int, default=1, help="seed for the middle random sample")
args = ap.parse_args()
path = resolve_file(args)
turns = [] # conversational turns: {"prompt": str, "reply": [text,...]}
edit_counts = {} # file path -> edit count
last_assistant_text = [] # final text blocks
recent_tools = [] # (name, short arg)
counts = {} # record type -> count
first_ts = last_ts = None
cwd = branch = None
with open(path, encoding="utf-8") as fh:
for line in fh:
line = line.strip()
if not line:
continue
try:
rec = json.loads(line)
except json.JSONDecodeError:
continue
rtype = rec.get("type")
counts[rtype] = counts.get(rtype, 0) + 1
ts = rec.get("timestamp")
if ts:
first_ts = first_ts or ts
last_ts = ts
cwd = cwd or rec.get("cwd")
branch = branch or rec.get("gitBranch")
if is_genuine_prompt(rec):
turns.append({"prompt": rec["message"]["content"], "reply": []})
if rtype == "assistant":
blocks = rec.get("message", {}).get("content", [])
if isinstance(blocks, list):
text_here = []
for b in blocks:
if not isinstance(b, dict):
continue
if b.get("type") == "text" and b.get("text", "").strip():
text_here.append(b["text"])
elif b.get("type") == "tool_use":
name = b.get("name", "?")
inp = b.get("input", {}) or {}
if name in EDIT_TOOLS:
fp = inp.get(EDIT_TOOLS[name])
if fp:
edit_counts[fp] = edit_counts.get(fp, 0) + 1
arg = inp.get("file_path") or inp.get("command") or inp.get("description") or inp.get("path") or ""
recent_tools.append((name, truncate(str(arg), 300)))
if text_here:
last_assistant_text = text_here # keep only the latest turn's text
if turns: # attach the agent's reply to the current turn
turns[-1]["reply"].extend(text_here)
# Non-overlapping index sets: first wins, then last, then the middle is
# sampled only from what's left between them. If the regions collide
# (short session), the overlap is simply dropped.
n = len(turns)
first_idx = list(range(min(args.first, n)))
last_start = max(len(first_idx), n - args.last)
last_idx = list(range(last_start, n))
mid_pool = list(range(len(first_idx), last_start))
if args.seed is not None:
random.seed(args.seed)
mid_idx = sorted(random.sample(mid_pool, min(args.middle, len(mid_pool))))
# ---- emit a bounded, XML-tagged report (newlines preserved) ----
out = sys.stdout.write
out("<session-summary>\n")
out(f" <file>{esc(path)}</file>\n")
out(f" <project>{esc(cwd)}</project>\n")
out(f" <git-branch>{esc(branch)}</git-branch>\n")
out(f' <time-span start="{esc(first_ts)}" end="{esc(last_ts)}" />\n')
out(f" <records>{esc(', '.join(f'{k}={v}' for k, v in sorted(counts.items())))}</records>\n")
out(f" <user-prompts count=\"{len(turns)}\" />\n")
out("</session-summary>\n")
def emit_exchanges(label, indices):
if not indices:
return
out(f'\n<exchanges section="{label}">\n')
for i in indices:
t = turns[i]
reply = "\n".join(t["reply"]).strip()
out(f' <exchange n="{i + 1}">\n')
out(f" <user>\n{clip(t['prompt'], args.maxlen)}\n </user>\n")
if reply:
out(f" <agent>\n{clip(reply, args.maxlen)}\n </agent>\n")
else:
out(" <agent note=\"no text reply — tool calls only\" />\n")
out(" </exchange>\n")
out("</exchanges>\n")
emit_exchanges("first", first_idx)
emit_exchanges("sampled-middle", mid_idx)
emit_exchanges("last", last_idx)
out(f'\n<most-edited-files top="{args.top_files}">\n')
for fp, c in sorted(edit_counts.items(), key=lambda kv: -kv[1])[: args.top_files]:
out(f' <file edits="{c}">{esc(fp)}</file>\n')
out("</most-edited-files>\n")
out("\n<last-tool-calls>\n")
for name, arg in recent_tools[-8:]:
out(f' <call tool="{esc(name)}">{esc(arg)}</call>\n')
out("</last-tool-calls>\n")
out("\n<final-agent-message>\n")
out(clip("\n".join(last_assistant_text), 2000) + "\n")
out("</final-agent-message>\n")
if __name__ == "__main__":
main()