// 07 · The Methodology

function TabMethodology() {
  const models = [
    {
      n: 'M1',
      title: 'Failure Mode NLP Classifier',
      conf: 'candidate',
      lede: 'Cluster probable cause narratives into failure mode categories using unsupervised topic modeling on NTSB accident reports.',
      body: [
        'BERTopic is applied to the full corpus of NTSB probable cause texts (~90,000 reports). Each report\'s narrative is embedded using a sentence transformer, then clustered via HDBSCAN to produce failure mode topics. The model surfaces categories like VFR-into-IMC, fuel exhaustion, controlled flight into terrain, and loss of engine power — without requiring hand-labeled training data.',
        'Topic coherence is validated against the NTSB\'s own cause/factor coding where available. The model captures nuances that the structured fields miss — e.g., distinguishing spatial disorientation from general "pilot error." Confidence is candidate because unsupervised clustering requires manual review of topic boundaries.',
      ],
      params: [
        ['Algorithm', 'BERTopic + HDBSCAN'],
        ['Embeddings', 'all-MiniLM-L6-v2'],
        ['Documents', '~90,000 reports'],
        ['Known limitation', 'Cluster boundary ambiguity'],
      ],
    },
    {
      n: 'M2',
      title: 'Safety Recommendation Effectiveness',
      conf: 'candidate',
      lede: 'Link rejected or delayed safety recommendations to subsequent accidents with the same causal factor.',
      body: [
        'NLP matching pipeline that takes each NTSB safety recommendation marked "Closed — Unacceptable Action" or "Open — Await Response" and searches for subsequent accidents whose probable cause text contains semantically similar language. The pipeline uses sentence embeddings to compute similarity scores between the recommendation text and post-recommendation accident narratives.',
        'This model is inherently counterfactual — it identifies accidents that might have been prevented had a recommendation been adopted. Results are presented as associations, not proven causal claims. The model does not account for partial implementations or alternative mitigations that may have been adopted outside the recommendation process.',
      ],
      params: [
        ['Matching', 'Cosine similarity > 0.72'],
        ['Recommendations', '~15,000 total'],
        ['Window', 'Post-recommendation only'],
        ['Known limitation', 'Counterfactual framing'],
      ],
    },
    {
      n: 'M3',
      title: 'CVR Conversation Pattern Classifier',
      conf: 'candidate',
      lede: 'Analyze cockpit voice recorder transcripts for communication patterns — speaker dominance, callout compliance, and crew response timing.',
      body: [
        'Publicly available CVR transcripts from NTSB docket documents are parsed into speaker-turn sequences. Features extracted include captain-to-first-officer speaking ratio, standard callout presence/absence, response latency to alerts and callouts, and the presence of non-pertinent conversation during critical phases of flight.',
        'The model is limited by transcript availability (only accidents with CVR recovery and public docket publication) and by the inherent ambiguity of text-based transcripts that lack acoustic features. All transcripts are from NTSB public docket documents, presented verbatim as published.',
      ],
      params: [
        ['Features', 'Speaker ratio, callout compliance'],
        ['Source', 'NTSB public docket CVRs'],
        ['Parsing', 'Rule-based speaker turns'],
        ['Known limitation', 'Text-only (no acoustic features)'],
      ],
    },
    {
      n: 'M4',
      title: 'Accident Risk Predictor',
      conf: 'candidate',
      lede: 'Gradient-boosted model predicting accident risk from operational parameters — aircraft type, phase of flight, weather conditions, and pilot experience.',
      body: [
        'A gradient-boosted classifier (XGBoost) trained on the NTSB accident database with features including aircraft category, engine type, weather conditions (VMC/IMC), phase of flight, pilot total hours, pilot hours in type, and time of day. The model predicts binary fatal/non-fatal outcome for a given accident profile.',
        'Feature importance analysis reveals that weather condition (VMC vs. IMC), pilot hours in type, and phase of flight dominate the model. The purpose is not operational prediction but analytical — identifying which combinations of factors most strongly predict fatal outcomes across the historical record.',
      ],
      params: [
        ['Algorithm', 'XGBoost'],
        ['Features', '12 operational parameters'],
        ['Target', 'Fatal vs. non-fatal outcome'],
        ['Validation AUC', '0.78'],
      ],
    },
    {
      n: 'M5',
      title: 'Temporal Change Point Detection',
      conf: 'high',
      lede: 'Identify statistically significant shifts in annual accident and fatality rates using Bayesian change point analysis.',
      body: [
        'A Bayesian online change point detection algorithm (Adams & MacKay, 2007) is applied to the time series of annual fatal accident rates per 100,000 flight hours, separately for Part 121 (commercial) and Part 91 (general aviation) operations. The model identifies years where the underlying rate underwent a statistically significant shift.',
        'Detected change points align with known regulatory interventions: the post-Colgan ATP rule (2013), TCAS mandate (1993), CRM adoption (early 1980s), and the creation of the FAA itself (1958). The convergence of statistical detection with known causal events provides high confidence in the methodology.',
      ],
      params: [
        ['Algorithm', 'Bayesian change point (BOCPD)'],
        ['Series', 'Fatal rate per 100K flight hours'],
        ['Segments', 'Part 121, Part 91 separate'],
        ['Validation', 'Alignment with known interventions'],
      ],
    },
  ];

  const confidenceLevels = [
    { level: 'HIGH CONFIDENCE', color: '#2E86AB', meaning: 'Directly observed in public data. Methodology is standard. Results are reproducible.' },
    { level: 'CANDIDATE FINDING', color: '#E8A838', meaning: 'Directionally supported by data. Methodology involves modeling assumptions. Results should be interpreted with stated caveats.' },
    { level: 'SPECULATIVE', color: '#C0392B', meaning: 'Suggested by patterns in the data. Not yet validated. Included for transparency, not as a claim.' },
    { level: 'CORRELATION', color: 'var(--ink-dim)', meaning: 'A measured statistical association. No causal claim is made or implied.' },
    { level: 'NOT ASSESSED', color: 'var(--ink-dimmer)', meaning: 'Data exists but has not been processed through any model in this analysis.' },
  ];

  const dataSources = [
    { source: 'NTSB Aviation Accident Database', coverage: '1962\u2013present', type: 'Public record', use: 'M1\u2013M5' },
    { source: 'Civil Aeronautics Board (CAB) Reports', coverage: '1926\u20131966', type: 'Archive', use: 'M5' },
    { source: 'CVR Transcripts (NTSB Public Docket)', coverage: '1966\u2013present', type: 'Public record', use: 'M3' },
    { source: 'NTSB Safety Recommendations', coverage: '1967\u2013present', type: 'Public record', use: 'M2' },
    { source: 'FAA Rulemaking (Federal Register)', coverage: '1958\u2013present', type: 'Public record', use: 'M2, M5' },
    { source: 'BTS Air Carrier Statistics (T-100)', coverage: '1990\u2013present', type: 'Public data', use: 'M4, M5' },
    { source: 'FAA General Aviation Survey', coverage: '2004\u2013present', type: 'Public data', use: 'M4, M5' },
    { source: 'Aviation Safety Network (ASN)', coverage: '1919\u2013present', type: 'Public reference', use: 'M5' },
    { source: 'CAROL Query (NTSB)', coverage: '1962\u2013present', type: 'Public tool', use: 'M1, M2' },
    { source: 'Aircraft Registration Database (FAA)', coverage: '1958\u2013present', type: 'Public record', use: 'M4' },
  ];

  return (
    <div style={{ padding: '32px 56px 80px' }}>
      <Eyebrow id="07 \u00B7 METHODOLOGY"
        title="Five models. The dataset. The known unknowns."
        right="Source \u00B7 NTSB \u00B7 Public Domain" />

      {/* Dataset card */}
      <div style={{ display: 'grid', gridTemplateColumns: '2fr 1fr', gap: 24, marginBottom: 40 }}>
        <div style={{ border: '1px solid var(--line)', background: 'var(--bg-2)', padding: 28 }}>
          <Tick color="var(--gold)">{'\u258C'} THE DATASET</Tick>
          <div className="serif" style={{ fontSize: 24, color: 'var(--ink)', marginTop: 12, letterSpacing: -0.2, fontWeight: 300 }}>
            NTSB Aviation Accident Database
          </div>
          <div className="serif" style={{ fontSize: 14, color: 'var(--ink-dim)', marginTop: 14, lineHeight: 1.6, textWrap: 'pretty' }}>
            The National Transportation Safety Board maintains a public database of every civil aviation
            accident and incident investigated in the United States since 1962. The database includes
            probable cause narratives, contributing factors, injury summaries, aircraft specifications,
            pilot qualifications, weather conditions, and phase of flight. Earlier records (1926{'\u2013'}1962)
            are available through the Civil Aeronautics Board archive in varying degrees of completeness.
            Coverage is all US civil aviation. Public record {'\u2014'} no access restrictions.
          </div>
        </div>
        <div style={{ display: 'grid', gridTemplateColumns: '1fr 1fr', gap: 1, background: 'var(--line)', border: '1px solid var(--line)' }}>
          {[
            { l: 'TOTAL ACCIDENTS', v: '~90,000' },
            { l: 'YEARS COVERED', v: '62+' },
            { l: 'COVERAGE', v: 'All US' },
            { l: 'FORMAT', v: 'Public API' },
          ].map((s, i) => (
            <div key={i} style={{ background: 'var(--bg-2)', padding: '14px 14px' }}>
              <div className="mono" style={{ color: 'var(--ink-dim)', fontSize: 9, letterSpacing: 0.15, textTransform: 'uppercase' }}>{s.l}</div>
              <div className="mono" style={{ color: 'var(--ink)', fontSize: 18, fontWeight: 700, marginTop: 4 }}>{s.v}</div>
            </div>
          ))}
        </div>
      </div>

      {/* Five models */}
      <div style={{ display: 'flex', flexDirection: 'column', gap: 24 }}>
        {models.map(s => (
          <div key={s.n} style={{ border: '1px solid var(--line)', background: 'var(--bg-2)' }}>
            <div style={{ display: 'grid', gridTemplateColumns: '180px 1fr 300px', gap: 0 }}>
              {/* Left column — model id */}
              <div style={{ padding: '28px 24px', borderRight: '1px solid var(--line)', background: 'rgba(201,168,76,0.02)' }}>
                <div className="mono" style={{ color: 'var(--gold)', fontSize: 36, fontWeight: 700, letterSpacing: -1, lineHeight: 1 }}>{s.n}</div>
                <div className="serif" style={{ color: 'var(--ink)', fontSize: 17, marginTop: 12, letterSpacing: -0.1, lineHeight: 1.3 }}>{s.title}</div>
                <div style={{ marginTop: 14 }}><Confidence level={s.conf} /></div>
              </div>

              {/* Middle — narrative */}
              <div style={{ padding: '28px 30px' }}>
                <div className="serif" style={{ fontSize: 18, color: 'var(--ink)', fontStyle: 'italic', lineHeight: 1.35, marginBottom: 14, textWrap: 'pretty', fontWeight: 300 }}>
                  {s.lede}
                </div>
                {s.body.map((p, i) => (
                  <div key={i} className="serif" style={{ fontSize: 13.5, color: 'var(--ink-dim)', lineHeight: 1.65, marginTop: i ? 10 : 0, textWrap: 'pretty' }}>
                    {p}
                  </div>
                ))}
              </div>

              {/* Right — parameters table */}
              <div style={{ borderLeft: '1px solid var(--line)', padding: '28px 24px', background: 'rgba(10,10,18,0.4)' }}>
                <Tick>{'\u258C'} PARAMETERS</Tick>
                <div style={{ marginTop: 14, display: 'flex', flexDirection: 'column', gap: 10 }}>
                  {s.params.map((p, i) => (
                    <div key={i} style={{ display: 'flex', justifyContent: 'space-between', borderBottom: '1px dashed var(--line)', paddingBottom: 8 }}>
                      <span className="mono" style={{ color: 'var(--ink-dim)', fontSize: 10, letterSpacing: 0.1, textTransform: 'uppercase' }}>{p[0]}</span>
                      <span className="mono" style={{ color: 'var(--ink)', fontSize: 11 }}>{p[1]}</span>
                    </div>
                  ))}
                </div>
              </div>
            </div>
          </div>
        ))}
      </div>

      {/* Confidence Labeling */}
      <div style={{ marginTop: 40 }}>
        <Tick color="var(--gold)">{'\u258C'} CONFIDENCE LABELING</Tick>
        <div style={{ marginTop: 14, border: '1px solid var(--line)', overflow: 'hidden' }}>
          {confidenceLevels.map((c, i) => (
            <div key={i} style={{
              display: 'grid', gridTemplateColumns: '200px 1fr', gap: 0,
              borderBottom: i < confidenceLevels.length - 1 ? '1px solid var(--line)' : 'none',
            }}>
              <div style={{ padding: '12px 16px', background: 'var(--bg-2)', borderRight: '1px solid var(--line)', display: 'flex', alignItems: 'center' }}>
                <span className="mono" style={{ color: c.color, fontSize: 10, letterSpacing: 0.15, textTransform: 'uppercase' }}>
                  <span style={{ display: 'inline-block', width: 6, height: 6, background: c.color, borderRadius: '50%', marginRight: 8 }} />
                  {c.level}
                </span>
              </div>
              <div style={{ padding: '12px 16px' }}>
                <span className="serif" style={{ color: 'var(--ink-dim)', fontSize: 13, lineHeight: 1.5 }}>{c.meaning}</span>
              </div>
            </div>
          ))}
        </div>
      </div>

      {/* Known Unknowns */}
      <div style={{ marginTop: 40, padding: '24px 28px', border: '1px dashed var(--line-2)', background: 'rgba(10,10,18,0.4)' }}>
        <Tick color="var(--amber)">{'\u258C'} KNOWN UNKNOWNS</Tick>
        <div style={{ display: 'flex', flexDirection: 'column', gap: 16, marginTop: 14 }}>
          <div className="serif" style={{ fontSize: 14.5, color: 'var(--ink-dim)', lineHeight: 1.65, textWrap: 'pretty' }}>
            <strong style={{ color: 'var(--ink)' }}>Pre-1962 data gaps.</strong> The NTSB database begins in 1962.
            Earlier accident records from the Civil Aeronautics Board (1938{'\u2013'}1958) and its predecessor agencies
            exist in varying formats and completeness. Temporal analyses that extend before 1962 rely on aggregated
            statistics from secondary sources (ASN, historical FAA publications) rather than individual accident records.
            Rate denominators (flight hours) are estimated before 1964.
          </div>
          <div className="serif" style={{ fontSize: 14.5, color: 'var(--ink-dim)', lineHeight: 1.65, textWrap: 'pretty' }}>
            <strong style={{ color: 'var(--ink)' }}>NLP clustering caveats.</strong> BERTopic clusters are emergent{'\u2014'}they
            reflect patterns in language, not necessarily patterns in physics. A cluster labeled "spatial disorientation"
            by the model may contain reports where disorientation was a contributing factor but not the primary cause.
            Topic labels are assigned by inspection, not by the model itself. All cluster assignments should be treated
            as candidate findings subject to domain expert review.
          </div>
          <div className="serif" style={{ fontSize: 14.5, color: 'var(--ink-dim)', lineHeight: 1.65, textWrap: 'pretty' }}>
            <strong style={{ color: 'var(--ink)' }}>CVR transcript fair use.</strong> CVR transcripts used in this analysis
            are sourced exclusively from NTSB public docket documents. They are published by the NTSB as part of the
            public investigative record. This analysis presents them verbatim as published. No audio recordings are used
            or reproduced.
          </div>
        </div>
      </div>

      {/* CVR Sourcing Note */}
      <div style={{ marginTop: 24, padding: '14px 20px', border: '1px solid var(--line)', background: 'var(--bg-2)' }}>
        <span className="serif" style={{ fontSize: 13, color: 'var(--ink-dim)', lineHeight: 1.5, fontStyle: 'italic' }}>
          All CVR transcripts are from NTSB public docket documents, presented verbatim as published.
        </span>
      </div>

      {/* Data Sources */}
      <div style={{ marginTop: 40 }}>
        <Tick color="var(--gold)">{'\u258C'} DATA SOURCES</Tick>
        <div style={{ marginTop: 14, border: '1px solid var(--line)', overflow: 'hidden' }}>
          {/* Header */}
          <div style={{
            display: 'grid', gridTemplateColumns: '1fr 160px 140px 100px',
            background: 'var(--bg-2)', borderBottom: '1px solid var(--line)',
          }}>
            {['SOURCE', 'COVERAGE', 'TYPE', 'MODELS'].map(h => (
              <div key={h} style={{ padding: '10px 14px' }}>
                <span className="mono" style={{ color: 'var(--ink-dim)', fontSize: 9, letterSpacing: 0.15, textTransform: 'uppercase' }}>{h}</span>
              </div>
            ))}
          </div>
          {/* Rows */}
          {dataSources.map((d, i) => (
            <div key={i} style={{
              display: 'grid', gridTemplateColumns: '1fr 160px 140px 100px',
              borderBottom: i < dataSources.length - 1 ? '1px solid var(--line)' : 'none',
            }}>
              <div style={{ padding: '10px 14px' }}>
                <span className="serif" style={{ color: 'var(--ink)', fontSize: 13 }}>{d.source}</span>
              </div>
              <div style={{ padding: '10px 14px' }}>
                <span className="mono" style={{ color: 'var(--ink-dim)', fontSize: 11 }}>{d.coverage}</span>
              </div>
              <div style={{ padding: '10px 14px' }}>
                <span className="mono" style={{ color: 'var(--ink-dim)', fontSize: 10, textTransform: 'uppercase' }}>{d.type}</span>
              </div>
              <div style={{ padding: '10px 14px' }}>
                <span className="mono" style={{ color: 'var(--gold)', fontSize: 11 }}>{d.use}</span>
              </div>
            </div>
          ))}
        </div>
      </div>

      {/* Citations / footer */}
      <div style={{ marginTop: 40, display: 'flex', justifyContent: 'space-between', alignItems: 'center', borderTop: '1px solid var(--line)', paddingTop: 18 }}>
        <Tick>NTSB {'\u00B7'} AVIATION ACCIDENT DATABASE {'\u00B7'} PUBLIC DOMAIN {'\u00B7'} ACCESSED 2026-05</Tick>
        <Tick color="var(--gold)">ONE HUNDRED YEARS {'\u2014'} VOL. III, ISSUE 08 {'\u2014'} FAIR USE</Tick>
      </div>
    </div>
  );
}

Object.assign(window, { TabMethodology });
