BareGit
#include "parser.h"
#include "macrodown.h"
#include "uni_algo/all.h"
#include <iostream>
#include <regex>

namespace macrodown
{

namespace
{

struct PrefixInfo
{
    char32_t cp;
    std::string macro;
    std::regex pattern;
};

struct DelimInfo
{
    char32_t cp;
    std::string macro;
    std::regex pattern;
};

// InlineParser handles the parsing of inline elements within a block of text.
// It scans the input string character by character (in UTF-32) and dispatches
// to specific handlers for different markup types.
class InlineParser
{
public:
    InlineParser(const std::string& input,
                 const std::vector<PrefixMarkup>& prefix_markups,
                 const std::vector<DelimitedMarkup>& delimited_markups)
        : input32_(una::utf8to32u(input)),
          prefix_markups_(prefix_markups),
          delimited_markups_(delimited_markups)
    {
        // Pre-process markup definitions into lookup tables for efficiency
        for (const auto& m : prefix_markups_)
        {
            auto cp = una::utf8to32u(m.prefix);
            if (!cp.empty())
                p_infos_.push_back({cp[0], m.macro_name, std::regex(m.pattern)});
        }
        for (const auto& m : delimited_markups_)
        {
            auto cp = una::utf8to32u(m.delimiter);
            if (!cp.empty())
                d_infos_.push_back({cp[0], m.macro_name, std::regex(m.pattern)});
        }
    }

    // Main parsing loop. Iterates through the input string and attempts to
    // match markup elements. If a match is found, the handler advances the
    // position. Otherwise, the current character is treated as plain text.
    std::vector<std::unique_ptr<Node>> parse()
    {
        while (pos_ < input32_.length())
        {
            if (handleEscape()) continue;
            if (handlePrefixMarkup()) continue;
            if (handleDelimitedMarkup()) continue;
            if (handleMacro()) continue;
            if (handleCode()) continue;
            if (handleLink()) continue;
            if (handleEmphasis()) continue;

            current_text_ += input32_[pos_];
            pos_++;
        }
        flushText();
        return std::move(nodes_);
    }

private:
    std::u32string input32_;
    const std::vector<PrefixMarkup>& prefix_markups_;
    const std::vector<DelimitedMarkup>& delimited_markups_;

    std::vector<PrefixInfo> p_infos_;
    std::vector<DelimInfo> d_infos_;

    size_t pos_ = 0;
    std::vector<std::unique_ptr<Node>> nodes_;
    std::u32string current_text_;

    // Pushes accumulated plain text into the node list as a Text node.
    void flushText()
    {
        if (!current_text_.empty())
        {
            nodes_.push_back(std::make_unique<Node>(Text{una::utf32to8(current_text_)} ));
            current_text_.clear();
        }
    }

    // Handles backslash escapes. Consumes the backslash and appends the
    // following character literally to the text buffer.
    bool handleEscape()
    {
        if (input32_[pos_] == '\\' && pos_ + 1 < input32_.length())
        {
            current_text_ += input32_[pos_ + 1];
            pos_ += 2;
            return true;
        }
        return false;
    }

    // Handles user-defined prefix markups (e.g., #tag).
    // Matches a specific prefix character and captures text until a boundary
    // (whitespace or punctuation, excluding underscore).
    bool handlePrefixMarkup()
    {
        for (const auto& info : p_infos_)
        {
            if (input32_[pos_] == info.cp)
            {
                size_t j = pos_ + 1;
                while (j < input32_.length())
                {
                    char32_t next_c = input32_[j];
                    if (una::codepoint::is_whitespace(next_c)) break;
                    if (next_c != '_' && next_c != '-' && next_c != '@' && next_c != '.' && una::codepoint::prop{next_c}.General_Category_P()) break;
                    j++;
                }

                if (j > pos_ + 1)
                {
                    // If the last character was a dot, and it is followed by whitespace or EOF,
                    // exclude it from the markup.
                    if (input32_[j - 1] == '.')
                    {
                        if (j == input32_.length() || una::codepoint::is_whitespace(input32_[j]))
                        {
                            j--;
                        }
                    }
                }

                if (j > pos_ + 1)
                {
                    flushText();
                    std::u32string content = input32_.substr(pos_ + 1, j - (pos_ + 1));
                    Macro macro;
                    macro.name = info.macro;
                    Group group;
                    group.addChild(std::make_unique<Node>(Text{una::utf32to8(content)}));
                    macro.arguments.push_back(std::make_unique<Node>(std::move(group)));
                    nodes_.push_back(std::make_unique<Node>(std::move(macro)));
                    pos_ = j;
                    return true;
                }
            }
        }
        return false;
    }

    // Handles user-defined delimited markups (e.g., :highlight:).
    // Matches a delimiter character and searches for a closing delimiter.
    // Enforces strict rules: no whitespace inside, no punctuation boundaries.
    bool handleDelimitedMarkup()
    {
        for (const auto& info : d_infos_)
        {
            if (input32_[pos_] == info.cp)
            {
                size_t j = pos_ + 1;
                bool valid = true;
                bool found_end = false;
                while (j < input32_.length())
                {
                    char32_t next_c = input32_[j];
                    if (next_c == info.cp)
                    {
                        found_end = true;
                        break;
                    }
                    if (una::codepoint::is_whitespace(next_c))
                    {
                        valid = false;
                        break;
                    }
                    if (next_c != '_' && next_c != '-' && una::codepoint::prop{next_c}.General_Category_P())
                    {
                        valid = false;
                        break;
                    }
                    j++;
                }

                if (found_end && valid && j > pos_ + 1)
                {
                    flushText();
                    std::u32string content = input32_.substr(pos_ + 1, j - (pos_ + 1));
                    Macro macro;
                    macro.name = info.macro;
                    Group group;
                    group.addChild(std::make_unique<Node>(Text{una::utf32to8(content)}));
                    macro.arguments.push_back(std::make_unique<Node>(std::move(group)));
                    nodes_.push_back(std::make_unique<Node>(std::move(macro)));
                    pos_ = j + 1;
                    return true;
                }
            }
        }
        return false;
    }

    // Handles standard macro calls (e.g., %name{arg}).
    // Parses the macro name and recursively parses its arguments enclosed in {} or [].
    bool handleMacro()
    {
        if (input32_[pos_] == '%')
        {
            flushText();
            pos_++; // skip %
            size_t name_start = pos_;
            while (pos_ < input32_.length() && (una::codepoint::is_alphanumeric(input32_[pos_]) || input32_[pos_] == '_'))
            {
                pos_++;
            }
            std::u32string name32 = input32_.substr(name_start, pos_ - name_start);
            std::string name = una::utf32to8(name32);

            if (name.empty())
            {
                current_text_ += '%';
                // Continue from loop
                return true; 
            }

            Macro macro;
            macro.name = name;

            while (pos_ < input32_.length())
            {
                char32_t open = input32_[pos_];
                if (open == '{' || open == '[')
                {
                    char32_t close = (open == '{') ? '}' : ']';
                    pos_++;

                    std::u32string arg_content32;
                    int balance = 1;
                    while (pos_ < input32_.length() && balance > 0)
                    {
                        if (input32_[pos_] == open)
                        {
                            balance++;
                            arg_content32 += input32_[pos_];
                        }
                        else if (input32_[pos_] == close)
                        {
                            balance--;
                            if (balance > 0) arg_content32 += input32_[pos_];
                        }
                        else
                        {
                            arg_content32 += input32_[pos_];
                        }
                        pos_++;
                    }

                    Group group;
                    auto sub_nodes = Parser::parse(una::utf32to8(arg_content32), prefix_markups_, delimited_markups_);
                    for (auto& n : sub_nodes)
                    {
                        group.addChild(std::move(n));
                    }
                    macro.arguments.push_back(std::make_unique<Node>(std::move(group)));
                }
                else
                {
                    break;
                }
            }
            nodes_.push_back(std::make_unique<Node>(std::move(macro)));
            return true;
        }
        return false;
    }

    // Handles inline code blocks enclosed in backticks (`code`).
    bool handleCode()
    {
        if (input32_[pos_] == '`')
        {
            size_t start = pos_ + 1;
            size_t end = input32_.find('`', start);
            if (end != std::u32string::npos)
            {
                flushText();
                std::u32string content32 = input32_.substr(start, end - start);

                Macro macro;
                macro.name = "code";

                Group group;
                group.addChild(std::make_unique<Node>(Text{una::utf32to8(content32)}));

                macro.arguments.push_back(std::make_unique<Node>(std::move(group)));
                nodes_.push_back(std::make_unique<Node>(std::move(macro)));
                pos_ = end + 1;
                return true;
            }
        }
        return false;
    }

    // Handles Markdown links ([text](url)).
    // Recursively parses the link text.
    bool handleLink()
    {
        if (input32_[pos_] == '[')
        {
            size_t label_start = pos_ + 1;
            size_t j = label_start;
            int bracket_bal = 1;
            while (j < input32_.length() && bracket_bal > 0)
            {
                if (input32_[j] == '[') bracket_bal++;
                else if (input32_[j] == ']') bracket_bal--;
                if (bracket_bal > 0) j++;
            }

            if (j < input32_.length() && bracket_bal == 0)
            {
                size_t close_bracket = j;
                if (close_bracket + 1 < input32_.length() && input32_[close_bracket + 1] == '(')
                {
                    size_t url_start = close_bracket + 2;
                    size_t url_end = input32_.find(')', url_start);
                    if (url_end != std::u32string::npos)
                    {
                        flushText();
                        std::u32string label32 = input32_.substr(label_start, close_bracket - label_start);
                        std::u32string url32 = input32_.substr(url_start, url_end - url_start);

                        Macro macro;
                        macro.name = "link";

                        // Arg 1: URL
                        Group group1;
                        group1.addChild(std::make_unique<Node>(Text{una::utf32to8(url32)}));
                        macro.arguments.push_back(std::make_unique<Node>(std::move(group1)));

                        // Arg 2: Text (parsed)
                        Group group2;
                        auto sub = Parser::parse(una::utf32to8(label32), prefix_markups_, delimited_markups_);
                        for (auto& n : sub) group2.addChild(std::move(n));
                        macro.arguments.push_back(std::make_unique<Node>(std::move(group2)));

                        nodes_.push_back(std::make_unique<Node>(std::move(macro)));
                        pos_ = url_end + 1;
                        return true;
                    }
                }
            }
        }
        return false;
    }

    // Handles emphasis (*em*) and strong emphasis (**strong**).
    // Recursively parses the content.
    bool handleEmphasis()
    {
        if (input32_[pos_] == '*')
        {
            bool strong = (pos_ + 1 < input32_.length() && input32_[pos_ + 1] == '*');
            size_t start_content = pos_ + (strong ? 2 : 1);

            std::u32string delim = strong ? U"**" : U"*";
            size_t end = input32_.find(delim, start_content);

            if (end != std::u32string::npos)
            {
                flushText();
                std::u32string content32 = input32_.substr(start_content, end - start_content);

                Macro macro;
                macro.name = strong ? "strong" : "em";

                Group group;
                auto sub = Parser::parse(una::utf32to8(content32), prefix_markups_, delimited_markups_);
                for (auto& n : sub) group.addChild(std::move(n));
                macro.arguments.push_back(std::make_unique<Node>(std::move(group)));

                nodes_.push_back(std::make_unique<Node>(std::move(macro)));
                pos_ = end + delim.length();
                return true;
            }
        }
        return false;
    }
};

} // namespace

std::vector<std::unique_ptr<Node>> Parser::parse(
    const std::string& input,
    const std::vector<PrefixMarkup>& prefix_markups,
    const std::vector<DelimitedMarkup>& delimited_markups)
{
    InlineParser parser(input, prefix_markups, delimited_markups);
    return parser.parse();
}

} // namespace macrodown