//! Table-Driven Grammar Storage
//!
//! This module defines the table structures for storing grammar instructions
//! in a flat, cache-friendly format. All tables are static arrays generated
//! at compile time from Python grammar definitions.
//!
//! # Architecture
//!
//! Instead of Arc<Grammar> trees with scattered heap allocations, we use
//! four flat tables with indexed access:
//!
//! 1. **GRAMMAR_TABLE**: Main instruction array (`&[GrammarInst]`)
//! 2. **CHILD_IDS**: Flattened child references (`&[u32]`)
//! 3. **TERMINATORS**: Flattened terminator references (`&[u32]`)
//! 4. **STRING_TABLE**: Deduplicated string storage (`&[&'static str]`)
//!
//! # Memory Layout Example
//!
//! For a simple grammar like:
//! ```text
//! Sequence(
//!     elements: [Ref("keyword"), Ref("identifier")],
//!     terminators: [Ref("semicolon")]
//! )
//! ```
//!
//! We generate:
//! ```text
//! GRAMMAR_TABLE[0] = GrammarInst {
//!     variant: Sequence,
//!     first_child_idx: 0,    // Points to CHILD_IDS[0]
//!     child_count: 2,         // 2 children
//!     first_terminator_idx: 0, // Points to TERMINATORS[0]
//!     terminator_count: 1,     // 1 terminator
//!     ...
//! }
//!
//! CHILD_IDS = [1, 2, ...]  // IDs of Ref("keyword") and Ref("identifier")
//! TERMINATORS = [3, ...]    // ID of Ref("semicolon")
//! ```

use crate::grammar_inst::{GrammarId, GrammarInst};

/// Grammar tables for a single dialect
///
/// Contains all static data needed for parsing. Generated by Python codegen
/// and embedded as `static` data in the binary.
#[derive(Debug, Clone)]
pub struct GrammarTables {
    /// Main instruction table (6,000+ instructions for ANSI)
    pub instructions: &'static [GrammarInst],

    /// Flattened child IDs (20,000+ entries for ANSI)
    ///
    /// Instructions index into this table via `first_child_idx` + `child_count`.
    /// This avoids nested Vec allocations.
    pub child_ids: &'static [u32],

    /// Flattened terminator IDs (8,000+ entries for ANSI)
    ///
    /// Instructions index into this table via `first_terminator_idx` + `terminator_count`.
    pub terminators: &'static [u32],

    /// Deduplicated string storage (3,000+ strings for ANSI)
    ///
    /// Contains:
    /// - Ref names ("SelectStatementSegment", "FromClauseSegment", etc.)
    /// - String templates ("SELECT", "INSERT", etc.)
    /// - Token types ("keyword", "identifier", etc.)
    /// - Meta names ("indent", "dedent", etc.)
    pub strings: &'static [&'static str],

    /// Auxiliary data table for variant-specific needs
    ///
    /// Contains:
    /// - max_times values (for AnyNumberOf/AnySetOf)
    /// - max_times_per_element values
    /// - delimiter indices (for Delimited)
    /// - bracket pair indices (for Bracketed)
    /// - exclude grammar indices (for Ref/OneOf/AnyNumberOf/AnySetOf)
    /// - simple_hint indices (optional)
    ///
    /// Indexed via aux_data_offsets table.
    pub aux_data: &'static [u32],

    /// Auxiliary data offsets (indexed by GrammarId)
    ///
    /// Maps each GrammarId to its starting offset in the aux_data table.
    /// This allows variants to store variable-length aux data without
    /// increasing GrammarInst size.
    pub aux_data_offsets: &'static [u32],

    /// Regex pattern storage for RegexParser
    ///
    /// Contains serialized regex patterns. Indexed via aux_data.
    /// Stored separately to avoid mixing with numeric indices.
    pub regex_patterns: &'static [&'static str],

    /// Simple hint storage (optional optimization)
    ///
    /// Contains pre-computed simple hints for fast pruning of OneOf alternatives.
    /// SimpleHintData entries reference spans in hint_string_indices.
    pub simple_hints: &'static [SimpleHintData],

    /// Indices into strings table for hint values.
    ///
    /// SimpleHintData stores start/count into this array.
    /// Each entry is an index into the strings table.
    pub hint_string_indices: &'static [u32],

    /// Per-instruction simple hint index (indexed by GrammarId).
    ///
    /// 0 means no hint available. Non-zero indices into simple_hints array.
    pub simple_hint_indices: &'static [u32],

    /// Per-instruction segment type offsets into the strings table (or 0xFFFFFFFF)
    pub segment_type_offsets: &'static [u32],

    /// Per-instruction segment class name offsets into the strings table (or 0xFFFFFFFF)
    pub segment_class_offsets: &'static [u32],

    /// Sparse casefold entries: (grammar_id, mode)
    /// Sorted by grammar_id for binary search. mode: 0=None, 1=Upper, 2=Lower
    pub casefold_sparse: &'static [(u32, u8)],

    /// Sparse trim_chars entries: (grammar_id, offset_into_trim_chars_data, count)
    /// Sorted by grammar_id for binary search. Empty if no grammars use trim_chars.
    pub trim_chars_sparse: &'static [(u32, u32, u8)],

    /// Flat array of string indices for trim_chars values
    pub trim_chars_data: &'static [u32],
}

impl GrammarTables {
    /// Create new grammar tables (for codegen use)
    pub const fn new(
        instructions: &'static [GrammarInst],
        child_ids: &'static [u32],
        terminators: &'static [u32],
        strings: &'static [&'static str],
        aux_data: &'static [u32],
        aux_data_offsets: &'static [u32],
        regex_patterns: &'static [&'static str],
        simple_hints: &'static [SimpleHintData],
        hint_string_indices: &'static [u32],
        simple_hint_indices: &'static [u32],
        segment_type_offsets: &'static [u32],
        segment_class_offsets: &'static [u32],
        casefold_sparse: &'static [(u32, u8)],
        trim_chars_sparse: &'static [(u32, u32, u8)],
        trim_chars_data: &'static [u32],
    ) -> Self {
        Self {
            instructions,
            child_ids,
            terminators,
            strings,
            aux_data,
            aux_data_offsets,
            regex_patterns,
            simple_hints,
            hint_string_indices,
            simple_hint_indices,
            segment_type_offsets,
            segment_class_offsets,
            casefold_sparse,
            trim_chars_sparse,
            trim_chars_data,
        }
    }

    /// Get instruction by ID
    #[inline]
    pub fn get_inst(&self, id: GrammarId) -> &GrammarInst {
        &self.instructions[id.get() as usize]
    }

    /// Get instruction by raw ID
    #[inline]
    pub fn get_inst_raw(&self, id: u32) -> &GrammarInst {
        &self.instructions[id as usize]
    }

    /// Get string by index
    #[inline]
    pub fn get_string(&self, idx: u32) -> &'static str {
        self.strings[idx as usize]
    }

    /// Get children IDs for an instruction
    #[inline]
    pub fn get_children(&self, inst: &GrammarInst) -> &[u32] {
        inst.children(self.child_ids)
    }

    /// Get terminator IDs for an instruction
    #[inline]
    pub fn get_terminators(&self, inst: &GrammarInst) -> &[u32] {
        inst.terminators(self.terminators)
    }

    /// Get aux data value by index
    #[inline]
    pub fn get_aux(&self, idx: u32) -> u32 {
        self.aux_data[idx as usize]
    }

    /// Get regex pattern by index
    #[inline]
    pub fn get_regex(&self, idx: u32) -> &'static str {
        self.regex_patterns[idx as usize]
    }

    /// Get segment type string by instruction id, if present
    #[inline]
    pub fn get_segment_type(&self, id: u32) -> Option<&'static str> {
        if id as usize >= self.segment_type_offsets.len() {
            return None;
        }
        let off = self.segment_type_offsets[id as usize];
        if off == 0xFFFFFFFF {
            None
        } else {
            Some(self.strings[off as usize])
        }
    }

    /// Get simple hint by index
    #[inline]
    pub fn get_simple_hint(&self, idx: u32) -> &SimpleHintData {
        &self.simple_hints[idx as usize]
    }

    /// Get simple hint for a grammar by its ID
    ///
    /// Returns None if the grammar has no hint (hint_idx = 0).
    /// Note: hint_idx=0 actually maps to SIMPLE_HINTS[0] which is empty().
    #[inline]
    pub fn get_simple_hint_for_grammar(&self, id: GrammarId) -> Option<&SimpleHintData> {
        if id.get() as usize >= self.simple_hint_indices.len() {
            return None;
        }
        let hint_idx = self.simple_hint_indices[id.get() as usize] as usize;
        if hint_idx >= self.simple_hints.len() {
            // Out of bounds - this shouldn't happen but handle gracefully
            None
        } else {
            // hint_idx is a direct index into simple_hints array
            // SIMPLE_HINTS[0] is reserved for empty hint
            Some(&self.simple_hints[hint_idx])
        }
    }

    /// Check if a hint can match a token
    ///
    /// Returns true if:
    /// - The hint is empty (complex grammar, must try)
    /// - The token's raw_upper matches any of the hint's raw values
    /// - The token's types intersect with the hint's token types
    #[inline]
    pub fn hint_can_match(
        &self,
        hint: &SimpleHintData,
        raw_upper: &str,
        token_types: &hashbrown::HashSet<String>,
    ) -> bool {
        // Empty hint means "complex - can't determine", so return true (must try it)
        if hint.is_empty() {
            return true;
        }

        // Check raw values (indices into hint_string_indices -> strings)
        let raw_start = hint.raw_values_start as usize;
        let raw_end = raw_start + hint.raw_values_count as usize;
        for i in raw_start..raw_end {
            let str_idx = self.hint_string_indices[i] as usize;
            if self.strings[str_idx] == raw_upper {
                return true;
            }
        }

        // Check token types intersection
        let types_start = hint.token_types_start as usize;
        let types_end = types_start + hint.token_types_count as usize;
        for i in types_start..types_end {
            let str_idx = self.hint_string_indices[i] as usize;
            if token_types.contains(self.strings[str_idx]) {
                return true;
            }
        }

        false
    }

    /// Get memory usage statistics
    pub fn memory_stats(&self) -> TableMemoryStats {
        use std::mem::size_of;

        let instructions_bytes = self.instructions.len() * size_of::<GrammarInst>();
        let child_ids_bytes = self.child_ids.len() * size_of::<u32>();
        let terminators_bytes = self.terminators.len() * size_of::<u32>();
        let aux_data_bytes = self.aux_data.len() * size_of::<u32>();

        // String data: sum of string lengths + pointer overhead
        let strings_bytes = self
            .strings
            .iter()
            .map(|s| s.len() + size_of::<&str>())
            .sum();

        let regex_bytes = self
            .regex_patterns
            .iter()
            .map(|s| s.len() + size_of::<&str>())
            .sum();

        let simple_hints_bytes = self.simple_hints.len() * size_of::<SimpleHintData>();

        TableMemoryStats {
            instructions: (self.instructions.len(), instructions_bytes),
            child_ids: (self.child_ids.len(), child_ids_bytes),
            terminators: (self.terminators.len(), terminators_bytes),
            strings: (self.strings.len(), strings_bytes),
            aux_data: (self.aux_data.len(), aux_data_bytes),
            regex_patterns: (self.regex_patterns.len(), regex_bytes),
            simple_hints: (self.simple_hints.len(), simple_hints_bytes),
        }
    }
}

/// Memory usage statistics for grammar tables
#[derive(Debug, Clone)]
pub struct TableMemoryStats {
    /// (count, bytes) for instructions
    pub instructions: (usize, usize),
    /// (count, bytes) for child IDs
    pub child_ids: (usize, usize),
    /// (count, bytes) for terminators
    pub terminators: (usize, usize),
    /// (count, bytes) for strings
    pub strings: (usize, usize),
    /// (count, bytes) for aux data
    pub aux_data: (usize, usize),
    /// (count, bytes) for regex patterns
    pub regex_patterns: (usize, usize),
    /// (count, bytes) for simple hints
    pub simple_hints: (usize, usize),
}

impl TableMemoryStats {
    /// Total memory usage in bytes
    pub fn total_bytes(&self) -> usize {
        self.instructions.1
            + self.child_ids.1
            + self.terminators.1
            + self.strings.1
            + self.aux_data.1
            + self.regex_patterns.1
            + self.simple_hints.1
    }

    /// Format as human-readable string
    pub fn format(&self) -> String {
        format!(
            "Total: {:.2} MB\n\
             - Instructions: {} × {} = {:.2} KB\n\
             - Child IDs: {} × 4 = {:.2} KB\n\
             - Terminators: {} × 4 = {:.2} KB\n\
             - Strings: {} entries = {:.2} KB\n\
             - Aux data: {} × 4 = {:.2} KB\n\
             - Regex patterns: {} entries = {:.2} KB\n\
             - Simple hints: {} × {} = {:.2} KB",
            self.total_bytes() as f64 / 1_048_576.0,
            self.instructions.0,
            std::mem::size_of::<GrammarInst>(),
            self.instructions.1 as f64 / 1024.0,
            self.child_ids.0,
            self.child_ids.1 as f64 / 1024.0,
            self.terminators.0,
            self.terminators.1 as f64 / 1024.0,
            self.strings.0,
            self.strings.1 as f64 / 1024.0,
            self.aux_data.0,
            self.aux_data.1 as f64 / 1024.0,
            self.regex_patterns.0,
            self.regex_patterns.1 as f64 / 1024.0,
            self.simple_hints.0,
            std::mem::size_of::<SimpleHintData>(),
            self.simple_hints.1 as f64 / 1024.0,
        )
    }
}

/// Simple hint data (pre-computed for fast pruning)
///
/// Compact representation of SimpleHint for table storage.
/// Instead of storing HashSets, we store sorted arrays of indices
/// into the string table.
#[derive(Debug, Clone)]
pub struct SimpleHintData {
    /// Start index in a shared "hint strings" table
    pub raw_values_start: u32,
    /// Number of raw values
    pub raw_values_count: u16,
    /// Start index in a shared "hint strings" table
    pub token_types_start: u32,
    /// Number of token types
    pub token_types_count: u16,
}

impl SimpleHintData {
    /// Create empty hint
    pub const fn empty() -> Self {
        Self {
            raw_values_start: 0,
            raw_values_count: 0,
            token_types_start: 0,
            token_types_count: 0,
        }
    }

    /// Check if this hint is empty (complex grammar)
    pub const fn is_empty(&self) -> bool {
        self.raw_values_count == 0 && self.token_types_count == 0
    }
}

/// Iterator over children of an instruction
///
/// Provides ergonomic access to child grammar IDs.
pub struct ChildrenIter<'a> {
    child_ids: &'a [u32],
    current: usize,
    end: usize,
}

impl<'a> ChildrenIter<'a> {
    /// Create iterator for instruction's children
    pub fn new(inst: &GrammarInst, child_ids: &'a [u32]) -> Self {
        let start = inst.first_child_idx as usize;
        let end = start + inst.child_count as usize;
        Self {
            child_ids,
            current: start,
            end,
        }
    }
}

impl<'a> Iterator for ChildrenIter<'a> {
    type Item = GrammarId;

    fn next(&mut self) -> Option<Self::Item> {
        if self.current < self.end {
            let id = GrammarId::new(self.child_ids[self.current]);
            self.current += 1;
            Some(id)
        } else {
            None
        }
    }

    fn size_hint(&self) -> (usize, Option<usize>) {
        let remaining = self.end - self.current;
        (remaining, Some(remaining))
    }
}

impl<'a> ExactSizeIterator for ChildrenIter<'a> {
    fn len(&self) -> usize {
        self.end - self.current
    }
}

/// Iterator over terminators of an instruction
pub struct TerminatorsIter<'a> {
    terminators: &'a [u32],
    current: usize,
    end: usize,
}

impl<'a> TerminatorsIter<'a> {
    /// Create iterator for instruction's terminators
    pub fn new(inst: &GrammarInst, terminators: &'a [u32]) -> Self {
        let start = inst.first_terminator_idx as usize;
        let end = start + inst.terminator_count as usize;
        Self {
            terminators,
            current: start,
            end,
        }
    }
}

impl<'a> Iterator for TerminatorsIter<'a> {
    type Item = GrammarId;

    fn next(&mut self) -> Option<Self::Item> {
        if self.current < self.end {
            let id = GrammarId::new(self.terminators[self.current]);
            self.current += 1;
            Some(id)
        } else {
            None
        }
    }

    fn size_hint(&self) -> (usize, Option<usize>) {
        let remaining = self.end - self.current;
        (remaining, Some(remaining))
    }
}

impl<'a> ExactSizeIterator for TerminatorsIter<'a> {
    fn len(&self) -> usize {
        self.end - self.current
    }
}

/// Extension trait for GrammarInst to provide iterator methods
pub trait GrammarInstExt {
    /// Iterate over children
    fn children_iter<'a>(&self, child_ids: &'a [u32]) -> ChildrenIter<'a>;

    /// Iterate over terminators
    fn terminators_iter<'a>(&self, terminators: &'a [u32]) -> TerminatorsIter<'a>;

    /// Get the aux_data start offset for this instruction using the
    /// grammar tables' aux_data_offsets mapping. The returned value is an
    /// index into `tables.aux_data` (usize).
    fn aux_offset_for(&self, tables: &GrammarTables, id: GrammarId) -> usize;

    /// Get an aux_data value for this instruction by relative index.
    /// `relative_idx` is an offset (0-based) from the instruction's
    /// aux_offset. Returns the u32 stored in `tables.aux_data` at that
    /// location.
    fn aux_at(&self, tables: &GrammarTables, id: GrammarId, relative_idx: usize) -> u32;
}

impl GrammarInstExt for GrammarInst {
    fn children_iter<'a>(&self, child_ids: &'a [u32]) -> ChildrenIter<'a> {
        ChildrenIter::new(self, child_ids)
    }

    fn terminators_iter<'a>(&self, terminators: &'a [u32]) -> TerminatorsIter<'a> {
        TerminatorsIter::new(self, terminators)
    }

    fn aux_offset_for(&self, tables: &GrammarTables, id: GrammarId) -> usize {
        let idx = id.get() as usize;
        tables.aux_data_offsets[idx] as usize
    }

    fn aux_at(&self, tables: &GrammarTables, id: GrammarId, relative_idx: usize) -> u32 {
        let start = self.aux_offset_for(tables, id);
        tables.aux_data[start + relative_idx]
    }
}

#[cfg(test)]
mod tests {
    use super::*;
    use crate::grammar_inst::GrammarVariant;

    #[test]
    fn test_grammar_tables_basic() {
        // Create mock tables
        static INSTRUCTIONS: &[GrammarInst] = &[
            GrammarInst::new(GrammarVariant::Sequence).with_children(0, 2),
            GrammarInst::new(GrammarVariant::Ref),
            GrammarInst::new(GrammarVariant::Token),
        ];
        static CHILD_IDS: &[u32] = &[1, 2];
        static TERMINATORS: &[u32] = &[];
        static STRINGS: &[&str] = &["SELECT", "keyword"];
        static AUX_DATA: &[u32] = &[];
        static AUX_DATA_OFFSETS: &[u32] = &[0, 0, 0]; // One per instruction
        static REGEX_PATTERNS: &[&str] = &[];
        static SIMPLE_HINTS: &[SimpleHintData] = &[];
        static HINT_STRING_INDICES: &[u32] = &[];
        static SIMPLE_HINT_INDICES: &[u32] = &[0, 0, 0]; // One per instruction
        static CASEFOLD_SPARSE: &[(u32, u8)] = &[]; // No casefold in this test
        static TRIM_CHARS_SPARSE: &[(u32, u32, u8)] = &[]; // No trim_chars in this test
        static TRIM_CHARS_DATA: &[u32] = &[];

        let tables = GrammarTables::new(
            INSTRUCTIONS,
            CHILD_IDS,
            TERMINATORS,
            STRINGS,
            AUX_DATA,
            AUX_DATA_OFFSETS,
            REGEX_PATTERNS,
            SIMPLE_HINTS,
            HINT_STRING_INDICES,
            SIMPLE_HINT_INDICES,
            &[], // segment_type_offsets
            &[], // segment_class_offsets
            CASEFOLD_SPARSE,
            TRIM_CHARS_SPARSE,
            TRIM_CHARS_DATA,
        );

        assert_eq!(tables.instructions.len(), 3);
        assert_eq!(tables.get_string(0), "SELECT");
        assert_eq!(tables.get_children(&INSTRUCTIONS[0]), &[1, 2]);
    }

    #[test]
    fn test_children_iterator() {
        let inst = GrammarInst::new(GrammarVariant::Sequence).with_children(2, 3);
        let child_ids = vec![0, 1, 10, 11, 12, 20, 21];

        let children: Vec<GrammarId> = inst.children_iter(&child_ids).collect();

        assert_eq!(children.len(), 3);
        assert_eq!(children[0].get(), 10);
        assert_eq!(children[1].get(), 11);
        assert_eq!(children[2].get(), 12);
    }

    #[test]
    fn test_terminators_iterator() {
        let inst = GrammarInst::new(GrammarVariant::OneOf).with_terminators(1, 2);
        let terminators = vec![0, 100, 101, 200];

        let terms: Vec<GrammarId> = inst.terminators_iter(&terminators).collect();

        assert_eq!(terms.len(), 2);
        assert_eq!(terms[0].get(), 100);
        assert_eq!(terms[1].get(), 101);
    }

    #[test]
    fn test_memory_stats() {
        static INSTRUCTIONS: &[GrammarInst] = &[
            GrammarInst::new(GrammarVariant::Sequence),
            GrammarInst::new(GrammarVariant::Ref),
        ];
        static CHILD_IDS: &[u32] = &[1, 2, 3];
        static TERMINATORS: &[u32] = &[10];
        static STRINGS: &[&str] = &["SELECT", "FROM"];
        static AUX_DATA: &[u32] = &[];
        static AUX_DATA_OFFSETS: &[u32] = &[0, 0]; // One per instruction
        static REGEX_PATTERNS: &[&str] = &[];
        static SIMPLE_HINTS: &[SimpleHintData] = &[];
        static HINT_STRING_INDICES: &[u32] = &[];
        static SIMPLE_HINT_INDICES: &[u32] = &[0, 0]; // One per instruction
        static CASEFOLD_SPARSE: &[(u32, u8)] = &[]; // No casefold in this test
        static TRIM_CHARS_SPARSE: &[(u32, u32, u8)] = &[]; // No trim_chars in this test
        static TRIM_CHARS_DATA: &[u32] = &[];

        let tables = GrammarTables::new(
            INSTRUCTIONS,
            CHILD_IDS,
            TERMINATORS,
            STRINGS,
            AUX_DATA,
            AUX_DATA_OFFSETS,
            REGEX_PATTERNS,
            SIMPLE_HINTS,
            HINT_STRING_INDICES,
            SIMPLE_HINT_INDICES,
            &[], // segment_type_offsets
            &[], // segment_class_offsets
            CASEFOLD_SPARSE,
            TRIM_CHARS_SPARSE,
            TRIM_CHARS_DATA,
        );

        let stats = tables.memory_stats();

        assert_eq!(stats.instructions.0, 2);
        assert_eq!(stats.instructions.1, 2 * std::mem::size_of::<GrammarInst>());
        assert_eq!(stats.child_ids.0, 3);
        assert_eq!(stats.terminators.0, 1);
        assert_eq!(stats.strings.0, 2);

        // Should be greater than zero
        assert!(stats.total_bytes() > 0);
    }

    #[test]
    fn test_simple_hint_data() {
        let empty = SimpleHintData::empty();
        assert!(empty.is_empty());

        let hint = SimpleHintData {
            raw_values_start: 0,
            raw_values_count: 2,
            token_types_start: 10,
            token_types_count: 1,
        };
        assert!(!hint.is_empty());
    }
}
