BareGit

Implement Macro Engine and simplified Parser with unit tests

Author: MetroWind <chris.corsair@gmail.com>
Date: Sat Jan 10 12:35:55 2026 -0800
Commit: c35f687b64dfc7712f858dbf269c56cbd63d098d

Changes

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 32a844d..a0194da 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -37,7 +37,7 @@ include_directories(include)
 # but for now we will just list them manually or add a library target.
 # Let's create a core library for the logic to share between main and tests.
 
-add_library(macrodown_lib STATIC src/lib_placeholder.cpp)
+add_library(macrodown_lib STATIC src/lib_placeholder.cpp src/macro_engine.cpp src/parser.cpp)
 target_include_directories(macrodown_lib PUBLIC include)
 target_link_libraries(macrodown_lib PUBLIC uni-algo::uni-algo)
 
@@ -47,7 +47,7 @@ target_link_libraries(macrodown PRIVATE macrodown_lib)
 # Testing
 enable_testing()
 
-add_executable(macrodown_test tests/test_main.cpp)
+add_executable(macrodown_test tests/test_main.cpp tests/test_macro_engine.cpp)
 target_link_libraries(macrodown_test PRIVATE macrodown_lib GTest::gtest_main)
 target_include_directories(macrodown_test PRIVATE include)
 
diff --git a/include/nodes.h b/include/nodes.h
index 24c4279..0477dde 100644
--- a/include/nodes.h
+++ b/include/nodes.h
@@ -9,7 +9,8 @@ namespace macrodown {
 
 enum class NodeType {
     Text,
-    Macro
+    Macro,
+    Group
 };
 
 // Abstract Base Class
@@ -26,6 +27,17 @@ struct TextNode : public Node {
     NodeType type() const override { return NodeType::Text; }
 };
 
+// Represents a collection of nodes (used for macro arguments)
+struct GroupNode : public Node {
+    std::vector<std::unique_ptr<Node>> children;
+    
+    NodeType type() const override { return NodeType::Group; }
+    
+    void addChild(std::unique_ptr<Node> node) {
+        children.push_back(std::move(node));
+    }
+};
+
 // Represents a macro call: %name{arg1}{arg2}...
 struct MacroNode : public Node {
     std::string name;
diff --git a/include/parser.h b/include/parser.h
new file mode 100644
index 0000000..e322a58
--- /dev/null
+++ b/include/parser.h
@@ -0,0 +1,20 @@
+#ifndef MACRODOWN_PARSER_H
+#define MACRODOWN_PARSER_H
+
+#include <string>
+#include <vector>
+#include <memory>
+#include "nodes.h"
+
+namespace macrodown {
+
+class Parser {
+public:
+    // Parses a string into a list of nodes (Text and Macros)
+    // This handles the Inline Parsing phase (Phase 2 of the design)
+    static std::vector<std::unique_ptr<Node>> parse(const std::string& input);
+};
+
+} // namespace macrodown
+
+#endif // MACRODOWN_PARSER_H
diff --git a/src/macro_engine.cpp b/src/macro_engine.cpp
new file mode 100644
index 0000000..14d2816
--- /dev/null
+++ b/src/macro_engine.cpp
@@ -0,0 +1,150 @@
+#include "macro_engine.h"
+#include "parser.h"
+#include <sstream>
+#include <stdexcept>
+#include <iostream>
+
+namespace macrodown {
+
+namespace {
+
+// Helper to split a string by delimiter
+std::vector<std::string> split(const std::string& s, char delimiter) {
+    std::vector<std::string> tokens;
+    std::string token;
+    std::istringstream tokenStream(s);
+    while (std::getline(tokenStream, token, delimiter)) {
+        // Trim whitespace
+        size_t first = token.find_first_not_of(" \t");
+        if (first == std::string::npos) continue;
+        size_t last = token.find_last_not_of(" \t");
+        tokens.push_back(token.substr(first, (last - first + 1)));
+    }
+    return tokens;
+}
+
+// Helper to replace all occurrences of a substring
+std::string replace_all(std::string str, const std::string& from, const std::string& to) {
+    size_t start_pos = 0;
+    while((start_pos = str.find(from, start_pos)) != std::string::npos) {
+        str.replace(start_pos, from.length(), to);
+        start_pos += to.length();
+    }
+    return str;
+}
+
+} // namespace
+
+Evaluator::Evaluator() {
+    // Register intrinsic %def macro
+    // Syntax: %def[name]{args...}{body}
+    // args... is a comma-separated list of argument names
+    defineIntrinsic("def", [this](const std::vector<std::string>& args) -> std::string {
+        if (args.size() < 3) {
+            // Error handling: %def requires at least 3 arguments (name, args, body)
+            // But wait, the structure of MacroNode has 'arguments'.
+            // For %def[name]{args}{body}:
+            // The AST will look like: 
+            // Name: "def"
+            // Argument 0 (Bracket): "name" (Technically CommonMark doesn't distinguish [] from {},
+            // but our parser might. The user prompt says %def[name]{args}{body}.
+            // Let's assume our parser maps [name] to arg 0, {args} to arg 1, {body} to arg 2.
+            return ""; 
+        }
+        
+        // We actually need to access the logic *outside* this callback if we want to change state.
+        // But lambda captures 'this', so we can modify macros_.
+        
+        std::string name = args[0];
+        std::string arg_list_str = args[1];
+        std::string body = args[2];
+        
+        std::vector<std::string> arg_names = split(arg_list_str, ',');
+        
+        this->define(name, arg_names, body);
+        return ""; // Definitions expand to nothing
+    });
+}
+
+void Evaluator::define(const std::string& name, const std::vector<std::string>& args, const std::string& body) {
+    macros_[name] = MacroDefinition(name, args, body);
+}
+
+void Evaluator::defineIntrinsic(const std::string& name, MacroCallback callback) {
+    macros_[name] = MacroDefinition(name, callback);
+}
+
+std::string Evaluator::evaluate(const Node& node) {
+    if (node.type() == NodeType::Text) {
+        return static_cast<const TextNode&>(node).content;
+    } else if (node.type() == NodeType::Macro) {
+        return evaluateMacro(static_cast<const MacroNode&>(node));
+    } else if (node.type() == NodeType::Group) {
+        std::string result;
+        const auto& group = static_cast<const GroupNode&>(node);
+        for (const auto& child : group.children) {
+            result += evaluate(*child);
+        }
+        return result;
+    }
+    return "";
+}
+
+std::string Evaluator::evaluateMacro(const MacroNode& macro) {
+    auto it = macros_.find(macro.name);
+    if (it == macros_.end()) {
+        // Undefined macro: return literal representation
+        // (This is a simplified behavior; usually we might want to warn)
+        std::string result = "%" + macro.name;
+        for (const auto& arg : macro.arguments) {
+            result += "{" + evaluate(*arg) + "}"; 
+        }
+        return result;
+    }
+
+    const MacroDefinition& def = it->second;
+
+    // Evaluate arguments
+    // Note: For %def, we might want raw arguments, but for general macros, 
+    // we evaluate arguments first?
+    // The design doc says: "The Evaluator recursively expands macros".
+    // Usually, strict evaluation means evaluating args first.
+    // But %def needs raw strings for name/arg_names.
+    // Let's assume we evaluate arguments to strings first, THEN pass to macro.
+    // Exception: If the macro expects specific raw syntax, we might need a flag.
+    // For now, we evaluate all arguments.
+    
+    std::vector<std::string> evaluated_args;
+    for (const auto& arg : macro.arguments) {
+        evaluated_args.push_back(evaluate(*arg));
+    }
+
+    if (def.is_intrinsic) {
+        return def.callback(evaluated_args);
+    } else {
+        // User defined macro
+        // 1. Check arg count
+        // (ignoring mismatch for now, or just filling empty)
+        
+        std::string body = def.body;
+        
+        // 2. Substitute arguments
+        for (size_t i = 0; i < def.arg_names.size(); ++i) {
+            std::string placeholder = "%" + def.arg_names[i];
+            std::string value = (i < evaluated_args.size()) ? evaluated_args[i] : "";
+            body = replace_all(body, placeholder, value);
+        }
+        
+        // 3. Parse and evaluate the body
+        // We use the static Parser::parse for this
+        auto nodes = Parser::parse(body);
+        
+        std::string result;
+        for (const auto& n : nodes) {
+            result += evaluate(*n);
+        }
+        return result;
+    }
+}
+
+} // namespace macrodown
diff --git a/src/parser.cpp b/src/parser.cpp
new file mode 100644
index 0000000..5379537
--- /dev/null
+++ b/src/parser.cpp
@@ -0,0 +1,97 @@
+#include "parser.h"
+#include <iostream>
+// We will use uni-algo later for full UTF-8, for now strict byte scanning for ASCII delimiters
+// is sufficient for this skeleton as per design doc 3.3
+
+namespace macrodown {
+
+std::vector<std::unique_ptr<Node>> Parser::parse(const std::string& input) {
+    std::vector<std::unique_ptr<Node>> nodes;
+    std::string current_text;
+    
+    size_t i = 0;
+    while (i < input.length()) {
+        if (input[i] == '%') {
+            // Potential macro start
+            // If we have accumulated text, push it
+            if (!current_text.empty()) {
+                nodes.push_back(std::make_unique<TextNode>(current_text));
+                current_text.clear();
+            }
+
+            // Check if escaped "\%" (This logic should really be before checking %)
+            // But let's assume % is the trigger.
+            
+            // Parse Macro Name
+            i++; // skip %
+            size_t name_start = i;
+            while (i < input.length() && (isalnum(input[i]) || input[i] == '_')) {
+                i++;
+            }
+            std::string name = input.substr(name_start, i - name_start);
+            
+            if (name.empty()) {
+                // Just a standalone %, treat as text
+                current_text += "%";
+                continue;
+            }
+
+            auto macro = std::make_unique<MacroNode>(name);
+
+            // Parse Arguments: [opt] or {arg}
+            // We treat [...] and {...} as arguments.
+            while (i < input.length()) {
+                char open = input[i];
+                if (open == '{' || open == '[') {
+                    char close = (open == '{') ? '}' : ']';
+                    i++; // skip open
+                    
+                    // Parse argument content recursively? 
+                    // For the "Parser::parse" logic, arguments are just text/nodes.
+                    // But we need to find the matching closing brace balancing nesting.
+                    
+                    std::string arg_content;
+                    int balance = 1;
+                    while (i < input.length() && balance > 0) {
+                        if (input[i] == open) {
+                            balance++;
+                            arg_content += input[i];
+                        } else if (input[i] == close) {
+                            balance--;
+                            if (balance > 0) arg_content += input[i];
+                        } else {
+                            arg_content += input[i];
+                        }
+                        i++;
+                    }
+                    
+                    // Now parse the argument content recursively
+                    auto group = std::make_unique<GroupNode>();
+                    // Recursively parse the content of the argument
+                    std::vector<std::unique_ptr<Node>> sub_nodes = parse(arg_content);
+                    for (auto& n : sub_nodes) {
+                        group->addChild(std::move(n));
+                    }
+                    
+                    macro->arguments.push_back(std::move(group));
+                } else {
+                    break; 
+                }
+            }
+            
+            nodes.push_back(std::move(macro));
+            
+        } else {
+            current_text += input[i];
+            i++;
+        }
+    }
+    
+    if (!current_text.empty()) {
+        nodes.push_back(std::make_unique<TextNode>(current_text));
+    }
+    
+    return nodes;
+}
+
+} // namespace macrodown
diff --git a/tests/test_macro_engine.cpp b/tests/test_macro_engine.cpp
new file mode 100644
index 0000000..31a329f
--- /dev/null
+++ b/tests/test_macro_engine.cpp
@@ -0,0 +1,83 @@
+#include <gtest/gtest.h>
+#include "macro_engine.h"
+#include "parser.h"
+#include "nodes.h"
+
+using namespace macrodown;
+
+class MacroEngineTest : public ::testing::Test {
+protected:
+    Evaluator evaluator;
+};
+
+// Test parsing of simple text
+TEST_F(MacroEngineTest, ParseText) {
+    auto nodes = Parser::parse("Hello World");
+    ASSERT_EQ(nodes.size(), 1);
+    EXPECT_EQ(nodes[0]->type(), NodeType::Text);
+    EXPECT_EQ(static_cast<TextNode*>(nodes[0].get())->content, "Hello World");
+}
+
+// Test parsing of macro
+TEST_F(MacroEngineTest, ParseMacro) {
+    auto nodes = Parser::parse("%m{arg}");
+    ASSERT_EQ(nodes.size(), 1);
+    EXPECT_EQ(nodes[0]->type(), NodeType::Macro);
+    
+    auto* macro = static_cast<MacroNode*>(nodes[0].get());
+    EXPECT_EQ(macro->name, "m");
+    ASSERT_EQ(macro->arguments.size(), 1);
+    
+    // Argument should be a GroupNode
+    EXPECT_EQ(macro->arguments[0]->type(), NodeType::Group);
+    auto* group = static_cast<GroupNode*>(macro->arguments[0].get());
+    ASSERT_EQ(group->children.size(), 1);
+    EXPECT_EQ(static_cast<TextNode*>(group->children[0].get())->content, "arg");
+}
+
+// Test intrinsic %def and expansion
+TEST_F(MacroEngineTest, DefAndExpand) {
+    // Define %hello{name} -> "Hello %name!"
+    // We use the Parser to create the definition call
+    std::string input = "%def[hello]{name}{Hello %name!}";
+    auto nodes = Parser::parse(input);
+    
+    // Evaluate the definition
+    for (const auto& node : nodes) {
+        evaluator.evaluate(*node);
+    }
+    
+    // Now call it: %hello{World}
+    auto call_nodes = Parser::parse("%hello{World}");
+    std::string result;
+    for (const auto& node : call_nodes) {
+        result += evaluator.evaluate(*node);
+    }
+    
+    EXPECT_EQ(result, "Hello World!");
+}
+
+// Test nested macros
+TEST_F(MacroEngineTest, NestedMacros) {
+    // %def[b]{t}{<b>%t</b>}
+    // %def[p]{t}{<p>%t</p>}
+    // Call: %p{Hello %b{World}} -> <p>Hello <b>World</b></p>
+    
+    std::vector<std::string> defs = {
+        "%def[b]{t}{<b>%t</b>}",
+        "%def[p]{t}{<p>%t</p>}"
+    };
+    
+    for (const auto& def : defs) {
+        auto nodes = Parser::parse(def);
+        for (const auto& node : nodes) evaluator.evaluate(*node);
+    }
+    
+    auto nodes = Parser::parse("%p{Hello %b{World}}");
+    std::string result;
+    for (const auto& node : nodes) {
+        result += evaluator.evaluate(*node);
+    }
+    
+    EXPECT_EQ(result, "<p>Hello <b>World</b></p>");
+}