BareGit
#include "block_parser.h"
#include <sstream>
#include <iostream>
#include <algorithm>

namespace macrodown
{

namespace
{

// Helper: check if line is blank (only spaces)
bool is_blank(const std::string& line)
{
    return std::all_of(line.begin(), line.end(), [](unsigned char c){ return std::isspace(c); });
}

// Helper: count indentation
size_t count_indent(const std::string& line, size_t offset)
{
    size_t count = 0;
    while(offset + count < line.size() && line[offset + count] == ' ')
    {
        count++;
    }
    return count;
}

} // namespace

BlockParser::BlockParser()
{
    root = std::make_unique<Block>(BlockType::Document);
    open_blocks.push_back({root.get()});
}

std::unique_ptr<Block> BlockParser::parse(const std::string& input)
{
    BlockParser parser;
    std::istringstream stream(input);
    std::string line;
    
    while(std::getline(stream, line))
    {
        // Remove \r if present (Windows line endings)
        if(!line.empty() && line.back() == '\r')
        {
            line.pop_back();
        }
        parser.process_line(line);
    }
    
    // Close all remaining blocks
    parser.close_unmatched_blocks(0); // 0 means keep only root, but since we are done, close everything
    // Actually we don't "close" root in the stack sense, but we mark open=false
    parser.root->open = false;

    return std::move(parser.root);
}

bool BlockParser::is_container(BlockType type)
{
    return type == BlockType::Document || 
           type == BlockType::Quote || 
           type == BlockType::List || 
           type == BlockType::ListItem;
}

// Determines if an open block matches the current line
// Updates offset to consume markers
bool BlockParser::matches(Block* block, const std::string& line, size_t& offset)
{
    if(block->type == BlockType::Document)
    {
        return true; // Document always matches
    }
    
    if(block->type == BlockType::Quote)
    {
        size_t indent = count_indent(line, offset);
        if(indent < 4)
        {
            if(offset + indent < line.size() && line[offset + indent] == '>')
            {
                offset += indent + 1; // Consume indent and '>'
                if(offset < line.size() && line[offset] == ' ') offset++; // Optional space
                return true;
            }
        }
        return false;
    }
    
    if(block->type == BlockType::Paragraph)
    {
        if(is_blank(line)) return false;
        
        // Paragraphs match unless interrupted by a new block type
        size_t indent = count_indent(line, offset);
        if(indent < 4)
        {
            // Check for BlockQuote
            if(offset + indent < line.size() && line[offset + indent] == '>') return false;
            
            // Check for ATX Heading
            size_t check_pos = offset + indent;
            if(check_pos < line.size() && line[check_pos] == '#')
            {
                // Confirm it's a heading (sequence of # followed by space or end)
                size_t hash_count = 0;
                while(check_pos + hash_count < line.size() && line[check_pos + hash_count] == '#' && hash_count < 6)
                {
                    hash_count++;
                }
                if(hash_count > 0 && (check_pos + hash_count == line.size() || line[check_pos + hash_count] == ' '))
                {
                    return false;
                }
            }
        }
        
        // It's a continuation
        return true;
    }
    
    return false;
}

void BlockParser::process_line(const std::string& line)
{
    size_t offset = 0;
    size_t matches_count = 0;
    
    // 1. Find matches in open blocks
    matches_count = 0; // Root always matches
    for(size_t i = 1; i < open_blocks.size(); ++i)
    {
        if(matches(open_blocks[i].block, line, offset))
        {
            matches_count = i;
        }
        else
        {
            break;
        }
    }

    // 2. Close unmatched blocks
    close_unmatched_blocks(matches_count);
    
    // 3. Open new blocks
    // Scan rest of line (at offset)
    
    // Check for BlockQuote
    while(true)
    {
        size_t indent = count_indent(line, offset);
        if(indent < 4 && offset + indent < line.size() && line[offset + indent] == '>')
        {
            offset += indent + 1;
            if(offset < line.size() && line[offset] == ' ') offset++;
            
            auto new_block = std::make_unique<Block>(BlockType::Quote);
            Block* ptr = new_block.get();
            open_blocks.back().block->children.push_back(std::move(new_block));
            open_blocks.push_back({ptr});
        }
        else
        {
            break;
        }
    }
    
    // 4. Handle Leaf Blocks (Heading, ThematicBreak) or continuation
    
    Block* tip = open_blocks.back().block;
    
    // If tip is a Paragraph, check for blank line (closes it)
    if(tip->type == BlockType::Paragraph)
    {
        if(is_blank(line))
        {
            close_unmatched_blocks(open_blocks.size() - 2); // Close paragraph
            return;
        }
        // Else, it's a continuation
        // (Unless it's interrupted by a Heading/Quote etc. - Simplified: we assume it continues)
        // Strictly, we should check if the line *starts* a new block (like Header)
        // If it does, we close the paragraph.
    }
    
    // Check for ATX Heading
    size_t indent = count_indent(line, offset);
    if(indent < 4)
    {
        size_t check_pos = offset + indent;
        size_t hash_count = 0;
        while(check_pos + hash_count < line.size() && line[check_pos + hash_count] == '#' && hash_count < 6)
        {
            hash_count++;
        }
        
        if(hash_count > 0 && (check_pos + hash_count == line.size() || line[check_pos + hash_count] == ' '))
        {
            // Found Heading
            // If we were in a paragraph, close it
            if(tip->type == BlockType::Paragraph)
            {
                close_unmatched_blocks(open_blocks.size() - 2);
                tip = open_blocks.back().block;
            }
            
            auto heading = std::make_unique<Block>(BlockType::Heading);
            heading->level = hash_count;
            // Content is the rest of the line (trimmed)
            size_t content_start = check_pos + hash_count;
            while(content_start < line.size() && line[content_start] == ' ') content_start++;
            heading->literal_content = line.substr(content_start);
            // Remove trailing hashes? CommonMark says yes. Optional for now.
            heading->open = false; // Headings are single line
            
            tip->children.push_back(std::move(heading));
            return;
        }
    }
    
    // 5. Finalize: Text or Paragraph
    if(is_blank(line))
    {
        return; // Ignore blank lines if not ending a paragraph
    }
    
    if(tip->type == BlockType::Document || tip->type == BlockType::Quote || tip->type == BlockType::List || tip->type == BlockType::ListItem)
    {
        // Create new Paragraph
        auto p = std::make_unique<Block>(BlockType::Paragraph);
        Block* p_ptr = p.get();
        tip->children.push_back(std::move(p));
        open_blocks.push_back({p_ptr});
        
        // Add text
        // Note: indentation in paragraph text is preserved but leading spaces of the first line?
        // CommonMark: stripped.
        size_t content_start = offset + count_indent(line, offset);
        p_ptr->literal_content = line.substr(content_start);
    }
    else if(tip->type == BlockType::Paragraph)
    {
        // Continuation
        // Remove leading spaces up to indent? simplified: just add space + text
        size_t content_start = offset + count_indent(line, offset);
        tip->literal_content += "\n" + line.substr(content_start);
    }
}

void BlockParser::close_unmatched_blocks(size_t last_matched_index)
{
    while(open_blocks.size() > last_matched_index + 1)
    {
        Block* block = open_blocks.back().block;
        block->open = false;
        open_blocks.pop_back();
    }
}

} // namespace macrodown