BareGit

Implement remaining CommonMark markups and extensive test coverage

- Added support for underscores (`_`) in emphasis parsing.

- Added an inline pre-processor to handle link and image references (`[Link][1]`).

- Expanded the `test_macrodown.cpp` test suite to cover all markups from the CommonMark help page (excluding Setext headings).
Author: MetroWind <chris.corsair@gmail.com>
Date: Sat Mar 21 22:27:03 2026 -0700
Commit: 6e10eb2f2a65281ce8eeb5d1eb20ddc143e18df7

Changes

diff --git a/src/macrodown.cpp b/src/macrodown.cpp
index 93ccf96..568381b 100644
--- a/src/macrodown.cpp
+++ b/src/macrodown.cpp
@@ -1,5 +1,8 @@
 #include "macrodown.h"
 
+#include <regex>
+#include <sstream>
+#include <map>
 #include "block_parser.h"
 #include "converter.h"
 #include "standard_library.h"
@@ -14,7 +17,36 @@ MacroDown::MacroDown()
 
 std::unique_ptr<Node> MacroDown::parse(const std::string& input)
 {
-    auto block_root = BlockParser::parse(input);
+    std::string processed_input;
+    std::map<std::string, std::string> link_refs;
+    std::regex link_def_re(R"(^[ \t]*\[([^\]]+)\]:[ \t]*([^\s]+)[ \t]*\r?$)");
+    std::smatch match;
+
+    std::istringstream stream(input);
+    std::string line;
+    bool first = true;
+    while(std::getline(stream, line))
+    {
+        if(std::regex_match(line, match, link_def_re))
+        {
+            link_refs[match[1]] = match[2];
+        }
+        else
+        {
+            if(!first) processed_input += "\n";
+            processed_input += line;
+            first = false;
+        }
+    }
+
+    for(const auto& pair : link_refs)
+    {
+        std::string id_escaped = std::regex_replace(pair.first, std::regex(R"([-[\]{}()*+?.,\^$|#\s])"), R"(\$&)");
+        std::regex ref_link_re(R"(\[([^\]]+)\]\[)" + id_escaped + R"(\])");
+        processed_input = std::regex_replace(processed_input, ref_link_re, "[$1](" + pair.second + ")");
+    }
+
+    auto block_root = BlockParser::parse(processed_input);
     auto macro_nodes = Converter::convert(block_root.get(), prefix_markups_,
                                           delimited_markups_);
 
diff --git a/src/parser.cpp b/src/parser.cpp
index ea961ea..ad8a47a 100644
--- a/src/parser.cpp
+++ b/src/parser.cpp
@@ -493,13 +493,24 @@ private:
     // Recursively parses the content.
     bool handleEmphasis()
     {
-        if(input32_[pos_] == '*')
+        if(input32_[pos_] == '*' || input32_[pos_] == '_')
         {
+            char32_t marker = input32_[pos_];
             bool strong =
-                (pos_ + 1 < input32_.length() && input32_[pos_ + 1] == '*');
+                (pos_ + 1 < input32_.length() && input32_[pos_ + 1] == marker);
             size_t start_content = pos_ + (strong ? 2 : 1);
 
-            std::u32string delim = strong ? U"**" : U"*";
+            std::u32string delim;
+            if(strong)
+            {
+                delim.push_back(marker);
+                delim.push_back(marker);
+            }
+            else
+            {
+                delim.push_back(marker);
+            }
+
             size_t end = input32_.find(delim, start_content);
 
             if(end != std::u32string::npos)
diff --git a/tests/test_macrodown.cpp b/tests/test_macrodown.cpp
index 11e646b..653371f 100644
--- a/tests/test_macrodown.cpp
+++ b/tests/test_macrodown.cpp
@@ -29,68 +29,101 @@ TEST(MacroDownTest, CustomMacro)
 
     EXPECT_EQ(html, "<p>Say Hello, User!</p>\n");
 }
-TEST(MacroDownTest, MarkdownElements)
+
+TEST(MacroDownTest, CommonMarkEmphasis)
 {
     MacroDown md;
-    // Emphasis
-    EXPECT_EQ(md.render(*md.parse("*em*")), "<p><em>em</em></p>\n");
-
-    // Strong
-    EXPECT_EQ(md.render(*md.parse("**bold**")),
-              "<p><strong>bold</strong></p>\n");
-
-    // Link
-    EXPECT_EQ(md.render(*md.parse("[Link](url)")),
-              "<p><a href=\"url\">Link</a></p>\n");
+    EXPECT_EQ(md.render(*md.parse("*Italic*")), "<p><em>Italic</em></p>\n");
+    EXPECT_EQ(md.render(*md.parse("_Italic_")), "<p><em>Italic</em></p>\n");
+    EXPECT_EQ(md.render(*md.parse("**Bold**")), "<p><strong>Bold</strong></p>\n");
+    EXPECT_EQ(md.render(*md.parse("__Bold__")), "<p><strong>Bold</strong></p>\n");
+}
 
-    // Image
-    EXPECT_EQ(md.render(*md.parse("![Alt Text](img.jpg)")),
-              "<p><img src=\"img.jpg\" alt=\"Alt Text\" /></p>\n");
+TEST(MacroDownTest, CommonMarkHeadings)
+{
+    MacroDown md;
+    EXPECT_EQ(md.render(*md.parse("# Heading 1")), "<h1>Heading 1</h1>\n");
+    EXPECT_EQ(md.render(*md.parse("## Heading 2")), "<h2>Heading 2</h2>\n");
+    EXPECT_EQ(md.render(*md.parse("### Heading 3")), "<h3>Heading 3</h3>\n");
+}
 
-    // Code
-    EXPECT_EQ(md.render(*md.parse("`code`")), "<p><code>code</code></p>\n");
+TEST(MacroDownTest, CommonMarkLinksAndImages)
+{
+    MacroDown md;
+    // Inline Link
+    EXPECT_EQ(md.render(*md.parse("[Link](http://a.com)")),
+              "<p><a href=\"http://a.com\">Link</a></p>\n");
+    // Inline Image
+    EXPECT_EQ(md.render(*md.parse("![Image](http://url/a.png)")),
+              "<p><img src=\"http://url/a.png\" alt=\"Image\" /></p>\n");
+              
+    // Reference Link
+    EXPECT_EQ(md.render(*md.parse("[Link][1]\n\n[1]: http://b.org")),
+              "<p><a href=\"http://b.org\">Link</a></p>\n");
+    // Reference Image
+    EXPECT_EQ(md.render(*md.parse("![Image][1]\n\n[1]: http://url/b.jpg")),
+              "<p><img src=\"http://url/b.jpg\" alt=\"Image\" /></p>\n");
 }
 
-TEST(MacroDownTest, BlockQuote)
+TEST(MacroDownTest, CommonMarkBlockquote)
 {
     MacroDown md;
-    std::string input = "> Hello\n> World";
-    std::string expected = "<blockquote>\n<p>Hello\nWorld</p>\n</blockquote>\n";
+    std::string input = "> Blockquote";
+    std::string expected = "<blockquote>\n<p>Blockquote</p>\n</blockquote>\n";
     EXPECT_EQ(md.render(*md.parse(input)), expected);
 }
 
-TEST(MacroDownTest, MixedContent)
+TEST(MacroDownTest, CommonMarkLists)
 {
     MacroDown md;
-    std::string input = "# Header\n\nParagraph with *em* and %code{macros}.";
-    std::string expected = "<h1>Header</h1>\n<p>Paragraph with <em>em</em> and "
-                           "<code>macros</code>.</p>\n";
-    EXPECT_EQ(md.render(*md.parse(input)), expected);
+    // Unordered List
+    EXPECT_EQ(md.render(*md.parse("* List")), "<ul>\n<li>\n<p>List</p>\n</li>\n</ul>\n");
+    EXPECT_EQ(md.render(*md.parse("- List")), "<ul>\n<li>\n<p>List</p>\n</li>\n</ul>\n");
+    EXPECT_EQ(md.render(*md.parse("+ List")), "<ul>\n<li>\n<p>List</p>\n</li>\n</ul>\n");
+
+    // Ordered List
+    EXPECT_EQ(md.render(*md.parse("1. One")), "<ol>\n<li>\n<p>One</p>\n</li>\n</ol>\n");
+    EXPECT_EQ(md.render(*md.parse("1) One")), "<ol>\n<li>\n<p>One</p>\n</li>\n</ol>\n");
 }
 
-TEST(MacroDownTest, InlineDefinition)
+TEST(MacroDownTest, CommonMarkHorizontalRule)
 {
     MacroDown md;
-    std::string input = "%def[bold]{t}{<b>%t</b>}\n\nThis is %bold{important}.";
-    // The first paragraph only contains the definition, which evaluates to
-    // empty string. The second paragraph uses the newly defined macro.
-    std::string expected = "<p>This is <b>important</b>.</p>\n";
-    EXPECT_EQ(md.render(*md.parse(input)), expected);
+    EXPECT_EQ(md.render(*md.parse("---")), "<hr />\n");
+    EXPECT_EQ(md.render(*md.parse("***")), "<hr />\n");
+    EXPECT_EQ(md.render(*md.parse("___")), "<hr />\n");
+}
+
+TEST(MacroDownTest, CommonMarkCode)
+{
+    MacroDown md;
+    // Inline Code
+    EXPECT_EQ(md.render(*md.parse("`Inline code`")), "<p><code>Inline code</code></p>\n");
+
+    // Fenced Code Block
+    std::string fenced_input = "```\nCode block\n```";
+    std::string fenced_expected = "<pre><code>Code block\n</code></pre>\n";
+    EXPECT_EQ(md.render(*md.parse(fenced_input)), fenced_expected);
+
+    // Indented Code Block
+    std::string indented_input = "    Code block";
+    std::string indented_expected = "<pre><code>Code block\n</code></pre>\n";
+    EXPECT_EQ(md.render(*md.parse(indented_input)), indented_expected);
 }
 
-TEST(MacroDownTest, FencedCode)
+TEST(MacroDownTest, MixedContent)
 {
     MacroDown md;
-    std::string input = "```cpp\nint main() {\n    return 0;\n}\n```";
-    std::string expected = "<pre><code class=\"language-cpp\">int main() {\n   "
-                           " return 0;\n}\n</code></pre>\n";
+    std::string input = "# Header\n\nParagraph with *em* and %code{macros}.";
+    std::string expected = "<h1>Header</h1>\n<p>Paragraph with <em>em</em> and "
+                           "<code>macros</code>.</p>\n";
     EXPECT_EQ(md.render(*md.parse(input)), expected);
 }
 
-TEST(MacroDownTest, FencedCodeNoLanguage)
+TEST(MacroDownTest, InlineDefinition)
 {
     MacroDown md;
-    std::string input = "~~~\nplain text\n~~~";
-    std::string expected = "<pre><code>plain text\n</code></pre>\n";
+    std::string input = "%def[bold]{t}{<b>%t</b>}\n\nThis is %bold{important}.";
+    std::string expected = "<p>This is <b>important</b>.</p>\n";
     EXPECT_EQ(md.render(*md.parse(input)), expected);
 }