# Stakeholder Analysis - 03

In [None]:
# === 03) Stakeholder Analysis ‚Äî Load ‚Üí LLM Score (Lo/Med/Hi) ‚Üí Human Review ‚Üí Save (no plots) ===
# Requires: pandas, gradio, openai   (pip install pandas gradio openai)

import os, json, re
from pathlib import Path
from datetime import datetime
import pandas as pd
import gradio as gr

# ---------- Project ROOT ----------
def find_project_root(max_up=8):
    p = Path.cwd()
    for _ in range(max_up):
        if (p / "configs").exists() and ((p / "src").exists() or (p / "notebooks").exists()):
            return p
        p = p.parent
    return Path.cwd()

ROOT = find_project_root()

# ---------- Helpers ----------
RATING = ["Lo","Med","Hi"]
COLUMNS = [
    "stakeholder_type","entity","category","justification","source_ref",
    "power","interest","threat","collab","delete","notes","confidence"
]

def slugify(s: str) -> str:
    return re.sub(r"[^0-9a-zA-Z]+","-", (s or "").strip().lower()).strip("-") or "unknown"

def state_output_dir(state: str) -> Path:
    s = (state or "").strip()
    primary = ROOT / "outputs" / slugify(s)
    legacy  = ROOT / "outputs" / slugify(s.replace(" state",""))
    if primary.exists(): return primary
    if legacy.exists():  return legacy
    primary.mkdir(parents=True, exist_ok=True)
    return primary

def load_latest_pea_row_any_state() -> dict | None:
    """
    Look across outputs/*/pea_summaries.csv and return the most-recent row by timestamp_utc.
    If tie/absent, pick the first found. Returns dict with subject, state, issue, summary.
    """
    candidates = list((ROOT / "outputs").glob("*/pea_summaries.csv"))
    newest = None
    newest_row = None
    for p in candidates:
        try:
            df = pd.read_csv(p)
            if df.empty: 
                continue
            if "timestamp_utc" in df.columns:
                df = df.sort_values("timestamp_utc")
            r = df.iloc[-1].to_dict()
            ts = r.get("timestamp_utc") or ""
            key = (ts, str(p))
            if (newest is None) or (key > newest):
                newest = key
                newest_row = {
                    "subject": r.get("subject",""),
                    "state": r.get("state",""),
                    "issue": r.get("issue_focus",""),
                    "summary": r.get("summary",""),
                }
        except Exception:
            continue
    return newest_row

def ensure_columns(df: pd.DataFrame) -> pd.DataFrame:
    df = df.copy()
    for c in ["stakeholder_type","entity","category","justification","source_ref","notes"]:
        if c not in df.columns: df[c] = ""
        df[c] = df[c].fillna("").astype(str)
    for c in ["power","interest","threat","collab"]:
        if c not in df.columns: df[c] = ""
        # map numeric to categorical if present
        df[c] = df[c].replace({1:"Lo",2:"Lo",3:"Med",4:"Med",5:"Hi"})
        df[c] = df[c].where(df[c].isin(RATING), "Lo")
    if "delete" not in df.columns:
        df["delete"] = 0
    df["delete"] = pd.to_numeric(df["delete"], errors="coerce").fillna(0).astype(int)
    if "confidence" not in df.columns:
        df["confidence"] = 0.6
    df["confidence"] = pd.to_numeric(df["confidence"], errors="coerce").fillna(0.6)
    return df[COLUMNS]

def blank_df(n=20) -> pd.DataFrame:
    row = {
        "stakeholder_type":"", "entity":"", "category":"", "justification":"", "source_ref":"",
        "power":"Lo","interest":"Lo","threat":"Lo","collab":"Lo", "delete":0, "notes":"", "confidence":0.6
    }
    return pd.DataFrame([row]*n, columns=COLUMNS)

# ---------- Load the most recent Subject/State/Issue (READ-ONLY) ----------
LATEST = load_latest_pea_row_any_state() or {"subject":"Climate Governance","state":"Kaduna State","issue":"Flood Control","summary":""}
SUBJECT_RO = LATEST["subject"]
STATE_RO   = LATEST["state"]
ISSUE_RO   = LATEST["issue"]

# ---------- Load stakeholders list (from Identification step) ----------
def load_stakeholders_for_state(state: str) -> tuple[pd.DataFrame, str]:
    out_dir = state_output_dir(state)
    # Prefer the curated identification file
    pref = out_dir / "pea_summaries_stakeholders.csv"
    fallback = out_dir / "stakeholders_llm_candidates.csv"
    src = None
    if pref.exists(): src = pref
    elif fallback.exists(): src = fallback
    if src is None:
        return blank_df(20), f"‚ÑπÔ∏è No stakeholder file found in {out_dir}. Starting blank."
    try:
        df = pd.read_csv(src)
        df = ensure_columns(df)
        # Keep only stakeholder columns if identification file stored context columns too
        extra_cols = {"timestamp_utc","subject","state","issue_focus","pea_summary_excerpt"}
        drop_these = [c for c in df.columns if c in extra_cols]
        if drop_these:
            df = df.drop(columns=drop_these, errors="ignore")
            df = ensure_columns(df)
        return df, f"‚úÖ Loaded {len(df)} rows from {src}"
    except Exception as e:
        return blank_df(20), f"‚ö†Ô∏è Failed to load {src}: {e}. Starting blank."

# ---------- Save scored list (overwrite + snapshot) ----------
def save_scored_for_state(state: str, df: pd.DataFrame) -> str:
    out_dir = state_output_dir(state)
    df = ensure_columns(df)
    # require at least one real entity
    real = df[df["entity"].astype(str).str.strip() != ""].copy()
    if real.empty:
        return "‚ö†Ô∏è Not saved: no non-empty entities."
    # drop rows flagged for deletion
    real = real[real["delete"] != 1].copy()

    latest = out_dir / "pea_stakeholder_scores.csv"  # overwrite "current"
    snap   = out_dir / f"pea_stakeholder_scores_{datetime.utcnow().isoformat(timespec='seconds').replace(':','-')}Z.csv"
    real.to_csv(latest, index=False)
    real.to_csv(snap, index=False)
    return f"üíæ Saved {len(real)} rows.\n- Latest: {latest}\n- Snapshot: {snap}"

# ---------- OpenAI client ----------
def _openai_client_or_none():
    cfg_path = ROOT / "configs" / "config.json"
    cfg = {}
    if cfg_path.exists():
        try:
            cfg = json.loads(cfg_path.read_text(encoding="utf-8")) or {}
        except Exception:
            cfg = {}
    key  = os.getenv("OPENAI_API_KEY") or os.getenv("OPEN_API_KEY") or cfg.get("OPENAI_API_KEY") or cfg.get("OPEN_API_KEY")
    base = os.getenv("OPENAI_BASE_URL") or cfg.get("ENVIRONMENT_URL") or cfg.get("OPENAI_BASE_URL")
    model = cfg.get("MODEL","gpt-4o-mini")
    if not key:
        return None, None, "No API key found; manual editing only (OK)."
    try:
        from openai import OpenAI
        client = OpenAI(api_key=key, base_url=base) if base else OpenAI(api_key=key)
        return client, model, None
    except Exception as e:
        return None, None, f"OpenAI init failed: {e}"

# ---------- Guardrailed prompts ----------
SYS_PROMPT = """You are a Political Economy Analyst producing comparative, non-personal stakeholder assessments.
Ethical guardrails:
- Never disclose or infer personal data about private individuals (no addresses, phones, private emails, health, etc.).
- Use only public institutional info and high-level roles/titles; cite uncertainty when appropriate.
- Output categorical ratings only: Power, Interest, Potential for Threat, Potential for Collaboration ‚àà {Lo, Med, Hi}.
Special rules:
- A state Governor (or equivalent) ‚áí Power=Hi.
- ‚ÄúPublic/General public‚Äù ‚áí Power=Lo.
Return ONLY JSONL with one object per input row:
{entity, power, interest, threat, collab, confidence (0‚Äì1), notes (<=120 chars)}.
"""

def llm_score(subject: str, state: str, issue: str, df: pd.DataFrame) -> tuple[pd.DataFrame, str]:
    client, model, err = _openai_client_or_none()
    if client is None:
        return df, f"‚ÑπÔ∏è LLM unavailable: {err}"

    rows = df.fillna("")
    rows = rows[rows["entity"].astype(str).str.strip() != ""].copy()
    if rows.empty:
        return df, "‚ÑπÔ∏è No non-empty entities to score."

    user_intro = (
        f"Subject: {subject}\nState: {state}\nIssue focus: {issue}\n"
        "Assign Lo/Med/Hi for power, interest, threat, collab for each input row.\n"
        "If strong corroboration is missing, select Med and add a short note.\n"
        "Input rows (JSONL) with fields: stakeholder_type, entity, category, justification, source_ref:\n"
    )
    jsonl_in = "\n".join(
        [json.dumps(x, ensure_ascii=False) for x in rows[["stakeholder_type","entity","category","justification","source_ref"]].to_dict(orient="records")]
    )

    try:
        resp = client.chat.completions.create(
            model=model,
            temperature=0.2,
            messages=[
                {"role":"system","content": SYS_PROMPT},
                {"role":"user","content": user_intro + "\n" + jsonl_in}
            ],
        )
        txt = (resp.choices[0].message.content or "").strip()
    except Exception as e:
        return df, f"‚ö†Ô∏è LLM call failed: {e}"

    # Parse updates
    updates = {}
    for ln in (txt.splitlines() if txt else []):
        ln = ln.strip()
        if not ln:
            continue
        ln = re.sub(r'^[\-\*\d\.\)\s]+','', ln)  # tolerate bullets
        try:
            obj = json.loads(ln)
        except Exception:
            continue
        ent = str(obj.get("entity","")).strip()
        if not ent:
            continue
        r = {
            "power":  (obj.get("power") or "").title(),
            "interest": (obj.get("interest") or "").title(),
            "threat": (obj.get("threat") or "").title(),
            "collab": (obj.get("collab") or "").title(),
            "notes":  (obj.get("notes") or ""),
        }
        # normalize
        for k in ["power","interest","threat","collab"]:
            if r[k] not in RATING: r[k] = "Med"
        try:
            conf = float(obj.get("confidence", ""))
            if not (0.0 <= conf <= 1.0): conf = 0.6
        except Exception:
            conf = 0.6
        r["confidence"] = conf
        updates[ent.lower()] = r

    if not updates:
        return df, "‚ö†Ô∏è No usable scoring rows returned. Table unchanged."

    out = df.copy()
    mask = out["entity"].astype(str).str.strip().str.lower()
    for i, ent_key in enumerate(mask):
        if ent_key in updates:
            for k, v in updates[ent_key].items():
                out.at[i, k] = v

    # Apply fixed rules (Governor/ Public)
    out.loc[out["entity"].str.strip().str.lower().eq("governor"), "power"] = "Hi"
    out.loc[out["entity"].str.strip().str.lower().isin(["public","general public"]), "power"] = "Lo"

    return out, f"‚úÖ Scored {len(updates)} of {len(df)} rows."

# ---------- Gradio UI ----------
with gr.Blocks(title="Stakeholder Analysis ‚Äî Score & Review (Lo/Med/Hi)") as demo:
    gr.Markdown("## Stakeholder Analysis ‚Äî Score & Review (Lo/Med/Hi)")

    # Read-only Subject / State / Issue from latest pea_summaries.csv
    with gr.Row():
        subject_tb = gr.Textbox(label="Subject (from latest)", value=SUBJECT_RO, interactive=False)
        state_tb   = gr.Textbox(label="State/Location (from latest)", value=STATE_RO, interactive=False)
        issue_tb   = gr.Textbox(label="Issue Focus (from latest)", value=ISSUE_RO, interactive=False)

    # Load stakeholders button + status
    with gr.Row():
        load_btn  = gr.Button("Load Stakeholders", variant="secondary")
        score_btn = gr.Button("Auto-Score with LLM", variant="secondary")
        save_btn  = gr.Button("Save Scores", variant="primary")

    table = gr.Dataframe(
        headers=COLUMNS,
        datatype=["str","str","str","str","str","str","str","str","str","number","str","number"],
        interactive=True,
        row_count=(20, "dynamic"),
        label="Edit here. Set delete=1 to drop a row before saving."
    )
    status_md = gr.Markdown("")

    # --- handlers ---
    def _on_load(state):
        df, msg = load_stakeholders_for_state(state)
        return df, msg

    def _on_score(subject, state, issue, df):
        df = pd.DataFrame(df, columns=COLUMNS)
        df = ensure_columns(df)
        scored, msg = llm_score(subject, state, issue, df)
        return scored, msg

    def _on_save(state, df):
        df = pd.DataFrame(df, columns=COLUMNS)
        msg = save_scored_for_state(state, df)
        return msg

    load_btn.click(_on_load, inputs=[state_tb], outputs=[table, status_md])
    score_btn.click(_on_score, inputs=[subject_tb, state_tb, issue_tb, table], outputs=[table, status_md])
    save_btn.click(_on_save, inputs=[state_tb, table], outputs=[status_md])

# If localhost is blocked in your environment, switch to share=True
demo.launch(inline=True, share=False)

* Running on local URL:  http://127.0.0.1:7861
* To create a public link, set `share=True` in `launch()`.




  snap   = out_dir / f"pea_stakeholder_scores_{datetime.utcnow().isoformat(timespec='seconds').replace(':','-')}Z.csv"
  snap   = out_dir / f"pea_stakeholder_scores_{datetime.utcnow().isoformat(timespec='seconds').replace(':','-')}Z.csv"
