Changes
diff --git a/.clang-format b/.clang-format
new file mode 100644
index 0000000..28faefc
--- /dev/null
+++ b/.clang-format
@@ -0,0 +1,26 @@
+Language: Cpp
+BasedOnStyle: LLVM
+IndentWidth: 4
+TabWidth: 4
+UseTab: Never
+ColumnLimit: 80
+BreakBeforeBraces: Allman
+AllowShortFunctionsOnASingleLine: Empty
+AllowShortBlocksOnASingleLine: Empty
+AllowShortIfStatementsOnASingleLine: WithoutElse
+IndentCaseLabels: false
+SpaceBeforeParens: Never
+PointerAlignment: Left
+DerivePointerAlignment: false
+Standard: Latest
+IncludeCategories:
+ - Regex: '^<[a-z0-9_]+>$'
+ Priority: 1
+ - Regex: '^<.*>'
+ Priority: 2
+ - Regex: '^".*"'
+ Priority: 3
+IncludeBlocks: Regroup
+SortIncludes: true
+InsertBraces: true
+AccessModifierOffset: -4
diff --git a/src/block_parser.cpp b/src/block_parser.cpp
index f2c0916..3a13c33 100644
--- a/src/block_parser.cpp
+++ b/src/block_parser.cpp
@@ -1,7 +1,8 @@
#include "block_parser.h"
-#include <sstream>
-#include <iostream>
+
#include <algorithm>
+#include <iostream>
+#include <sstream>
namespace macrodown
{
@@ -12,7 +13,8 @@ namespace
// Helper: check if line is blank (only spaces)
bool is_blank(const std::string& line)
{
- return std::all_of(line.begin(), line.end(), [](unsigned char c){ return std::isspace(c); });
+ return std::all_of(line.begin(), line.end(),
+ [](unsigned char c) { return std::isspace(c); });
}
// Helper: count indentation
@@ -39,7 +41,7 @@ std::unique_ptr<Block> BlockParser::parse(const std::string& input)
BlockParser parser;
std::istringstream stream(input);
std::string line;
-
+
while(std::getline(stream, line))
{
// Remove \r if present (Windows line endings)
@@ -49,9 +51,10 @@ std::unique_ptr<Block> BlockParser::parse(const std::string& input)
}
parser.process_line(line);
}
-
+
// Close all remaining blocks
- parser.close_unmatched_blocks(0); // 0 means keep only root, but since we are done, close everything
+ parser.close_unmatched_blocks(
+ 0); // 0 means keep only root, but since we are done, close everything
// Actually we don't "close" root in the stack sense, but we mark open=false
parser.root->open = false;
@@ -60,10 +63,8 @@ std::unique_ptr<Block> BlockParser::parse(const std::string& input)
bool BlockParser::is_container(BlockType type)
{
- return type == BlockType::Document ||
- type == BlockType::Quote ||
- type == BlockType::List ||
- type == BlockType::ListItem;
+ return type == BlockType::Document || type == BlockType::Quote ||
+ type == BlockType::List || type == BlockType::ListItem;
}
// Determines if an open block matches the current line
@@ -74,7 +75,7 @@ bool BlockParser::matches(Block* block, const std::string& line, size_t& offset)
{
return true; // Document always matches
}
-
+
if(block->type == BlockType::Quote)
{
size_t indent = count_indent(line, offset);
@@ -83,50 +84,62 @@ bool BlockParser::matches(Block* block, const std::string& line, size_t& offset)
if(offset + indent < line.size() && line[offset + indent] == '>')
{
offset += indent + 1; // Consume indent and '>'
- if(offset < line.size() && line[offset] == ' ') offset++; // Optional space
+ if(offset < line.size() && line[offset] == ' ')
+ {
+ offset++; // Optional space
+ }
return true;
}
}
return false;
}
-
+
if(block->type == BlockType::Paragraph)
{
- if(is_blank(line)) return false;
-
+ if(is_blank(line))
+ {
+ return false;
+ }
+
// Paragraphs match unless interrupted by a new block type
size_t indent = count_indent(line, offset);
if(indent < 4)
{
// Check for BlockQuote
- if(offset + indent < line.size() && line[offset + indent] == '>') return false;
-
+ if(offset + indent < line.size() && line[offset + indent] == '>')
+ {
+ return false;
+ }
+
// Check for ATX Heading
size_t check_pos = offset + indent;
if(check_pos < line.size() && line[check_pos] == '#')
{
- // Confirm it's a heading (sequence of # followed by space or end)
+ // Confirm it's a heading (sequence of # followed by space or
+ // end)
size_t hash_count = 0;
- while(check_pos + hash_count < line.size() && line[check_pos + hash_count] == '#' && hash_count < 6)
+ while(check_pos + hash_count < line.size() &&
+ line[check_pos + hash_count] == '#' && hash_count < 6)
{
hash_count++;
}
- if(hash_count > 0 && (check_pos + hash_count == line.size() || line[check_pos + hash_count] == ' '))
+ if(hash_count > 0 && (check_pos + hash_count == line.size() ||
+ line[check_pos + hash_count] == ' '))
{
return false;
}
}
}
-
+
// It's a continuation
return true;
}
-
+
if(block->type == BlockType::FencedCode)
{
return true;
}
-
+
return false;
}
@@ -134,7 +147,7 @@ void BlockParser::process_line(const std::string& line)
{
size_t offset = 0;
size_t matches_count = 0;
-
+
// 1. Find matches in open blocks
matches_count = 0; // Root always matches
for(size_t i = 1; i < open_blocks.size(); ++i)
@@ -151,19 +164,23 @@ void BlockParser::process_line(const std::string& line)
// 2. Close unmatched blocks
close_unmatched_blocks(matches_count);
-
+
// 3. Open new blocks
// Scan rest of line (at offset)
-
+
// Check for BlockQuote
while(true)
{
size_t indent = count_indent(line, offset);
- if(indent < 4 && offset + indent < line.size() && line[offset + indent] == '>')
+ if(indent < 4 && offset + indent < line.size() &&
+ line[offset + indent] == '>')
{
offset += indent + 1;
- if(offset < line.size() && line[offset] == ' ') offset++;
-
+ if(offset < line.size() && line[offset] == ' ')
+ {
+ offset++;
+ }
+
auto new_block = std::make_unique<Block>(BlockType::Quote);
Block* ptr = new_block.get();
open_blocks.back().block->children.push_back(std::move(new_block));
@@ -174,11 +191,11 @@ void BlockParser::process_line(const std::string& line)
break;
}
}
-
+
// 4. Handle Leaf Blocks (Heading, ThematicBreak) or continuation
-
+
Block* tip = open_blocks.back().block;
-
+
// Check if we are inside a FencedCode block
if(tip->type == BlockType::FencedCode)
{
@@ -187,12 +204,19 @@ void BlockParser::process_line(const std::string& line)
{
size_t check_pos = offset + indent;
size_t fence_len = 0;
- while(check_pos + fence_len < line.size() && line[check_pos + fence_len] == tip->fence_char) fence_len++;
+ while(check_pos + fence_len < line.size() &&
+ line[check_pos + fence_len] == tip->fence_char)
+ {
+ fence_len++;
+ }
if(fence_len >= tip->fence_length)
{
// Verify no trailing characters other than space
size_t trail = check_pos + fence_len;
- while(trail < line.size() && line[trail] == ' ') trail++;
+ while(trail < line.size() && line[trail] == ' ')
+ {
+ trail++;
+ }
if(trail == line.size())
{
// Closing fence
@@ -202,31 +226,46 @@ void BlockParser::process_line(const std::string& line)
}
}
// Add line to content
- if(!tip->literal_content.empty()) tip->literal_content += "\n";
+ if(!tip->literal_content.empty())
+ {
+ tip->literal_content += "\n";
+ }
tip->literal_content += line.substr(offset);
return;
}
-
+
// Check for FencedCode Opening
size_t indent = count_indent(line, offset);
if(indent < 4)
{
size_t check_pos = offset + indent;
- if(check_pos < line.size() && (line[check_pos] == '`' || line[check_pos] == '~'))
+ if(check_pos < line.size() &&
+ (line[check_pos] == '`' || line[check_pos] == '~'))
{
char fence_char = line[check_pos];
size_t fence_len = 0;
- while(check_pos + fence_len < line.size() && line[check_pos + fence_len] == fence_char) fence_len++;
-
+ while(check_pos + fence_len < line.size() &&
+ line[check_pos + fence_len] == fence_char)
+ {
+ fence_len++;
+ }
+
if(fence_len >= 3)
{
size_t info_start = check_pos + fence_len;
- while(info_start < line.size() && line[info_start] == ' ') info_start++;
+ while(info_start < line.size() && line[info_start] == ' ')
+ {
+ info_start++;
+ }
std::string info_string = line.substr(info_start);
-
+
bool valid = true;
- if(fence_char == '`' && info_string.find('`') != std::string::npos) valid = false;
-
+ if(fence_char == '`' &&
+ info_string.find('`') != std::string::npos)
+ {
+ valid = false;
+ }
+
if(valid)
{
if(tip->type == BlockType::Paragraph)
@@ -234,12 +273,13 @@ void BlockParser::process_line(const std::string& line)
close_unmatched_blocks(open_blocks.size() - 2);
tip = open_blocks.back().block;
}
-
- auto code_block = std::make_unique<Block>(BlockType::FencedCode);
+
+ auto code_block =
+ std::make_unique<Block>(BlockType::FencedCode);
code_block->fence_char = fence_char;
code_block->fence_length = fence_len;
code_block->info_string = info_string;
-
+
Block* ptr = code_block.get();
tip->children.push_back(std::move(code_block));
open_blocks.push_back({ptr});
@@ -258,23 +298,25 @@ void BlockParser::process_line(const std::string& line)
return;
}
// Else, it's a continuation
- // (Unless it's interrupted by a Heading/Quote etc. - Simplified: we assume it continues)
- // Strictly, we should check if the line *starts* a new block (like Header)
- // If it does, we close the paragraph.
+ // (Unless it's interrupted by a Heading/Quote etc. - Simplified: we
+ // assume it continues) Strictly, we should check if the line *starts* a
+ // new block (like Header) If it does, we close the paragraph.
}
-
+
// Check for ATX Heading
indent = count_indent(line, offset);
if(indent < 4)
{
size_t check_pos = offset + indent;
size_t hash_count = 0;
- while(check_pos + hash_count < line.size() && line[check_pos + hash_count] == '#' && hash_count < 6)
+ while(check_pos + hash_count < line.size() &&
+ line[check_pos + hash_count] == '#' && hash_count < 6)
{
hash_count++;
}
-
- if(hash_count > 0 && (check_pos + hash_count == line.size() || line[check_pos + hash_count] == ' '))
+
+ if(hash_count > 0 && (check_pos + hash_count == line.size() ||
+ line[check_pos + hash_count] == ' '))
{
// Found Heading
// If we were in a paragraph, close it
@@ -283,38 +325,42 @@ void BlockParser::process_line(const std::string& line)
close_unmatched_blocks(open_blocks.size() - 2);
tip = open_blocks.back().block;
}
-
+
auto heading = std::make_unique<Block>(BlockType::Heading);
heading->level = hash_count;
// Content is the rest of the line (trimmed)
size_t content_start = check_pos + hash_count;
- while(content_start < line.size() && line[content_start] == ' ') content_start++;
+ while(content_start < line.size() && line[content_start] == ' ')
+ {
+ content_start++;
+ }
heading->literal_content = line.substr(content_start);
// Remove trailing hashes? CommonMark says yes. Optional for now.
heading->open = false; // Headings are single line
-
+
tip->children.push_back(std::move(heading));
return;
}
}
-
+
// 5. Finalize: Text or Paragraph
if(is_blank(line))
{
return; // Ignore blank lines if not ending a paragraph
}
-
- if(tip->type == BlockType::Document || tip->type == BlockType::Quote || tip->type == BlockType::List || tip->type == BlockType::ListItem)
+
+ if(tip->type == BlockType::Document || tip->type == BlockType::Quote ||
+ tip->type == BlockType::List || tip->type == BlockType::ListItem)
{
// Create new Paragraph
auto p = std::make_unique<Block>(BlockType::Paragraph);
Block* p_ptr = p.get();
tip->children.push_back(std::move(p));
open_blocks.push_back({p_ptr});
-
+
// Add text
- // Note: indentation in paragraph text is preserved but leading spaces of the first line?
- // CommonMark: stripped.
+ // Note: indentation in paragraph text is preserved but leading spaces
+ // of the first line? CommonMark: stripped.
size_t content_start = offset + count_indent(line, offset);
p_ptr->literal_content = line.substr(content_start);
}
diff --git a/src/converter.cpp b/src/converter.cpp
index 3c208be..0cf55aa 100644
--- a/src/converter.cpp
+++ b/src/converter.cpp
@@ -1,23 +1,26 @@
#include "converter.h"
-#include "parser.h"
-#include "macrodown.h"
+
#include <iostream>
+#include "macrodown.h"
+#include "parser.h"
+
namespace macrodown
{
-std::vector<std::unique_ptr<Node>> Converter::convert(
- const Block* root,
- const std::vector<PrefixMarkup>& prefix_markups,
- const std::vector<DelimitedMarkup>& delimited_markups)
+std::vector<std::unique_ptr<Node>>
+Converter::convert(const Block* root,
+ const std::vector<PrefixMarkup>& prefix_markups,
+ const std::vector<DelimitedMarkup>& delimited_markups)
{
std::vector<std::unique_ptr<Node>> nodes;
-
+
if(root->type == BlockType::Document)
{
for(const auto& child : root->children)
{
- auto node = convert_block(child.get(), prefix_markups, delimited_markups);
+ auto node =
+ convert_block(child.get(), prefix_markups, delimited_markups);
if(node)
{
nodes.push_back(std::move(node));
@@ -28,37 +31,43 @@ std::vector<std::unique_ptr<Node>> Converter::convert(
{
// Should not happen if root is Document, but handle single block
auto node = convert_block(root, prefix_markups, delimited_markups);
- if(node) nodes.push_back(std::move(node));
+ if(node)
+ {
+ nodes.push_back(std::move(node));
+ }
}
-
+
return nodes;
}
-std::unique_ptr<Node> Converter::convert_block(
- const Block* block,
- const std::vector<PrefixMarkup>& prefix_markups,
- const std::vector<DelimitedMarkup>& delimited_markups)
+std::unique_ptr<Node>
+Converter::convert_block(const Block* block,
+ const std::vector<PrefixMarkup>& prefix_markups,
+ const std::vector<DelimitedMarkup>& delimited_markups)
{
- if(!block) return nullptr;
+ if(!block)
+ {
+ return nullptr;
+ }
std::string macro_name;
switch(block->type)
{
- case BlockType::Paragraph:
- macro_name = "p";
- break;
- case BlockType::Heading:
- macro_name = "h" + std::to_string(block->level);
- break;
- case BlockType::Quote:
- macro_name = "quote";
- break;
- case BlockType::FencedCode:
- macro_name = "fenced_code";
- break;
- default:
- return nullptr; // Ignore unknown blocks for now
+ case BlockType::Paragraph:
+ macro_name = "p";
+ break;
+ case BlockType::Heading:
+ macro_name = "h" + std::to_string(block->level);
+ break;
+ case BlockType::Quote:
+ macro_name = "quote";
+ break;
+ case BlockType::FencedCode:
+ macro_name = "fenced_code";
+ break;
+ default:
+ return nullptr; // Ignore unknown blocks for now
}
Macro macro;
@@ -69,18 +78,22 @@ std::unique_ptr<Node> Converter::convert_block(
// Arg 1: Info string
Group info_group;
info_group.addChild(std::make_unique<Node>(Text{block->info_string}));
- macro.arguments.push_back(std::make_unique<Node>(std::move(info_group)));
-
+ macro.arguments.push_back(
+ std::make_unique<Node>(std::move(info_group)));
+
// Arg 2: Content (unparsed)
Group content_group;
- content_group.addChild(std::make_unique<Node>(Text{block->literal_content}));
- macro.arguments.push_back(std::make_unique<Node>(std::move(content_group)));
+ content_group.addChild(
+ std::make_unique<Node>(Text{block->literal_content}));
+ macro.arguments.push_back(
+ std::make_unique<Node>(std::move(content_group)));
}
else if(block->children.empty())
{
// Leaf block: Parse literal content
Group group;
- auto inline_nodes = Parser::parse(block->literal_content, prefix_markups, delimited_markups);
+ auto inline_nodes = Parser::parse(block->literal_content,
+ prefix_markups, delimited_markups);
for(auto& n : inline_nodes)
{
group.addChild(std::move(n));
@@ -93,7 +106,8 @@ std::unique_ptr<Node> Converter::convert_block(
Group group;
for(const auto& child : block->children)
{
- auto child_node = convert_block(child.get(), prefix_markups, delimited_markups);
+ auto child_node =
+ convert_block(child.get(), prefix_markups, delimited_markups);
if(child_node)
{
group.addChild(std::move(child_node));
diff --git a/src/macro_engine.cpp b/src/macro_engine.cpp
index 4016a50..3add8de 100644
--- a/src/macro_engine.cpp
+++ b/src/macro_engine.cpp
@@ -1,10 +1,12 @@
#include "macro_engine.h"
-#include "parser.h"
+
+#include <iostream>
#include <sstream>
#include <stdexcept>
-#include <iostream>
#include <variant>
+#include "parser.h"
+
namespace macrodown
{
@@ -21,7 +23,10 @@ std::vector<std::string> split(const std::string& s, char delimiter)
{
// Trim whitespace
size_t first = token.find_first_not_of(" \t");
- if(first == std::string::npos) continue;
+ if(first == std::string::npos)
+ {
+ continue;
+ }
size_t last = token.find_last_not_of(" \t");
tokens.push_back(token.substr(first, (last - first + 1)));
}
@@ -29,7 +34,8 @@ std::vector<std::string> split(const std::string& s, char delimiter)
}
// Helper to replace all occurrences of a substring
-std::string replace_all(std::string str, const std::string& from, const std::string& to)
+std::string replace_all(std::string str, const std::string& from,
+ const std::string& to)
{
size_t start_pos = 0;
while((start_pos = str.find(from, start_pos)) != std::string::npos)
@@ -45,25 +51,29 @@ std::string replace_all(std::string str, const std::string& from, const std::str
Evaluator::Evaluator()
{
// Register intrinsic %def macro
- defineIntrinsic("def", [this](const std::vector<std::string>& args) -> std::string
- {
- if(args.size() < 3)
- {
- return "";
- }
-
- std::string name = args[0];
- std::string arg_list_str = args[1];
- std::string body = args[2];
-
- std::vector<std::string> arg_names = split(arg_list_str, ',');
-
- this->define(name, arg_names, body);
- return "";
- });
+ defineIntrinsic("def",
+ [this](const std::vector<std::string>& args) -> std::string
+ {
+ if(args.size() < 3)
+ {
+ return "";
+ }
+
+ std::string name = args[0];
+ std::string arg_list_str = args[1];
+ std::string body = args[2];
+
+ std::vector<std::string> arg_names =
+ split(arg_list_str, ',');
+
+ this->define(name, arg_names, body);
+ return "";
+ });
}
-void Evaluator::define(const std::string& name, const std::vector<std::string>& args, const std::string& body)
+void Evaluator::define(const std::string& name,
+ const std::vector<std::string>& args,
+ const std::string& body)
{
macros_[name] = MacroDefinition(name, args, body);
}
@@ -75,28 +85,30 @@ void Evaluator::defineIntrinsic(const std::string& name, MacroCallback callback)
std::string Evaluator::evaluate(const Node& node)
{
- return std::visit([this](auto&& arg) -> std::string
- {
- using T = std::decay_t<decltype(arg)>;
- if constexpr (std::is_same_v<T, Text>)
- {
- return arg.content;
- }
- else if constexpr (std::is_same_v<T, Macro>)
+ return std::visit(
+ [this](auto&& arg) -> std::string
{
- return this->evaluateMacro(arg);
- }
- else if constexpr (std::is_same_v<T, Group>)
- {
- std::string result;
- for(const auto& child : arg.children)
+ using T = std::decay_t<decltype(arg)>;
+ if constexpr(std::is_same_v<T, Text>)
{
- result += this->evaluate(*child);
+ return arg.content;
}
- return result;
- }
- return "";
- }, node.data);
+ else if constexpr(std::is_same_v<T, Macro>)
+ {
+ return this->evaluateMacro(arg);
+ }
+ else if constexpr(std::is_same_v<T, Group>)
+ {
+ std::string result;
+ for(const auto& child : arg.children)
+ {
+ result += this->evaluate(*child);
+ }
+ return result;
+ }
+ return "";
+ },
+ node.data);
}
std::string Evaluator::evaluateMacro(const Macro& macro)
@@ -107,7 +119,7 @@ std::string Evaluator::evaluateMacro(const Macro& macro)
std::string result = "%" + macro.name;
for(const auto& arg : macro.arguments)
{
- result += "{" + evaluate(*arg) + "}";
+ result += "{" + evaluate(*arg) + "}";
}
return result;
}
@@ -127,16 +139,17 @@ std::string Evaluator::evaluateMacro(const Macro& macro)
else
{
std::string body = def.body;
-
+
for(size_t i = 0; i < def.arg_names.size(); ++i)
{
std::string placeholder = "%" + def.arg_names[i];
- std::string value = (i < evaluated_args.size()) ? evaluated_args[i] : "";
+ std::string value =
+ (i < evaluated_args.size()) ? evaluated_args[i] : "";
body = replace_all(body, placeholder, value);
}
-
+
auto nodes = Parser::parse(body);
-
+
std::string result;
for(const auto& n : nodes)
{
diff --git a/src/macrodown.cpp b/src/macrodown.cpp
index f96e574..93ccf96 100644
--- a/src/macrodown.cpp
+++ b/src/macrodown.cpp
@@ -1,4 +1,5 @@
#include "macrodown.h"
+
#include "block_parser.h"
#include "converter.h"
#include "standard_library.h"
@@ -14,7 +15,8 @@ MacroDown::MacroDown()
std::unique_ptr<Node> MacroDown::parse(const std::string& input)
{
auto block_root = BlockParser::parse(input);
- auto macro_nodes = Converter::convert(block_root.get(), prefix_markups_, delimited_markups_);
+ auto macro_nodes = Converter::convert(block_root.get(), prefix_markups_,
+ delimited_markups_);
if(macro_nodes.empty())
{
diff --git a/src/main.cpp b/src/main.cpp
index 548bcea..11e5c27 100644
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -1,6 +1,7 @@
-#include <iostream>
#include <fstream>
+#include <iostream>
#include <sstream>
+
#include "macrodown.h"
using namespace macrodown;
diff --git a/src/parser.cpp b/src/parser.cpp
index e159661..ea961ea 100644
--- a/src/parser.cpp
+++ b/src/parser.cpp
@@ -1,9 +1,11 @@
#include "parser.h"
-#include "macrodown.h"
-#include "uni_algo/all.h"
+
#include <iostream>
-#include <regex>
#include <optional>
+#include <regex>
+
+#include "macrodown.h"
+#include "uni_algo/all.h"
namespace macrodown
{
@@ -34,22 +36,27 @@ public:
InlineParser(const std::string& input,
const std::vector<PrefixMarkup>& prefix_markups,
const std::vector<DelimitedMarkup>& delimited_markups)
- : input32_(una::utf8to32u(input)),
- prefix_markups_(prefix_markups),
+ : input32_(una::utf8to32u(input)), prefix_markups_(prefix_markups),
delimited_markups_(delimited_markups)
{
// Pre-process markup definitions into lookup tables for efficiency
- for (const auto& m : prefix_markups_)
+ for(const auto& m : prefix_markups_)
{
auto cp = una::utf8to32u(m.prefix);
- if (!cp.empty())
- p_infos_.push_back({cp[0], m.macro_name, std::regex(m.pattern)});
+ if(!cp.empty())
+ {
+ p_infos_.push_back(
+ {cp[0], m.macro_name, std::regex(m.pattern)});
+ }
}
- for (const auto& m : delimited_markups_)
+ for(const auto& m : delimited_markups_)
{
auto cp = una::utf8to32u(m.delimiter);
- if (!cp.empty())
- d_infos_.push_back({cp[0], m.macro_name, std::regex(m.pattern)});
+ if(!cp.empty())
+ {
+ d_infos_.push_back(
+ {cp[0], m.macro_name, std::regex(m.pattern)});
+ }
}
}
@@ -58,16 +65,40 @@ public:
// position. Otherwise, the current character is treated as plain text.
std::vector<std::unique_ptr<Node>> parse()
{
- while (pos_ < input32_.length())
+ while(pos_ < input32_.length())
{
- if (handleEscape()) continue;
- if (handlePrefixMarkup()) continue;
- if (handleDelimitedMarkup()) continue;
- if (handleMacro()) continue;
- if (handleCode()) continue;
- if (handleImage()) continue;
- if (handleLink()) continue;
- if (handleEmphasis()) continue;
+ if(handleEscape())
+ {
+ continue;
+ }
+ if(handlePrefixMarkup())
+ {
+ continue;
+ }
+ if(handleDelimitedMarkup())
+ {
+ continue;
+ }
+ if(handleMacro())
+ {
+ continue;
+ }
+ if(handleCode())
+ {
+ continue;
+ }
+ if(handleImage())
+ {
+ continue;
+ }
+ if(handleLink())
+ {
+ continue;
+ }
+ if(handleEmphasis())
+ {
+ continue;
+ }
current_text_ += input32_[pos_];
pos_++;
@@ -91,9 +122,10 @@ private:
// Pushes accumulated plain text into the node list as a Text node.
void flushText()
{
- if (!current_text_.empty())
+ if(!current_text_.empty())
{
- nodes_.push_back(std::make_unique<Node>(Text{una::utf32to8(current_text_)} ));
+ nodes_.push_back(
+ std::make_unique<Node>(Text{una::utf32to8(current_text_)}));
current_text_.clear();
}
}
@@ -102,7 +134,7 @@ private:
// following character literally to the text buffer.
bool handleEscape()
{
- if (input32_[pos_] == '\\' && pos_ + 1 < input32_.length())
+ if(input32_[pos_] == '\\' && pos_ + 1 < input32_.length())
{
current_text_ += input32_[pos_ + 1];
pos_ += 2;
@@ -116,41 +148,53 @@ private:
// (whitespace or punctuation, excluding underscore).
bool handlePrefixMarkup()
{
- for (const auto& info : p_infos_)
+ for(const auto& info : p_infos_)
{
- if (input32_[pos_] == info.cp)
+ if(input32_[pos_] == info.cp)
{
size_t j = pos_ + 1;
- while (j < input32_.length())
+ while(j < input32_.length())
{
char32_t next_c = input32_[j];
- if (una::codepoint::is_whitespace(next_c)) break;
- if (next_c != '_' && next_c != '-' && next_c != '@' && next_c != '.' && una::codepoint::prop{next_c}.General_Category_P()) break;
+ if(una::codepoint::is_whitespace(next_c))
+ {
+ break;
+ }
+ if(next_c != '_' && next_c != '-' && next_c != '@' &&
+ next_c != '.' &&
+ una::codepoint::prop{next_c}.General_Category_P())
+ {
+ break;
+ }
j++;
}
- if (j > pos_ + 1)
+ if(j > pos_ + 1)
{
- // If the last character was a dot, and it is followed by whitespace or EOF,
- // exclude it from the markup.
- if (input32_[j - 1] == '.')
+ // If the last character was a dot, and it is followed by
+ // whitespace or EOF, exclude it from the markup.
+ if(input32_[j - 1] == '.')
{
- if (j == input32_.length() || una::codepoint::is_whitespace(input32_[j]))
+ if(j == input32_.length() ||
+ una::codepoint::is_whitespace(input32_[j]))
{
j--;
}
}
}
- if (j > pos_ + 1)
+ if(j > pos_ + 1)
{
flushText();
- std::u32string content = input32_.substr(pos_ + 1, j - (pos_ + 1));
+ std::u32string content =
+ input32_.substr(pos_ + 1, j - (pos_ + 1));
Macro macro;
macro.name = info.macro;
Group group;
- group.addChild(std::make_unique<Node>(Text{una::utf32to8(content)}));
- macro.arguments.push_back(std::make_unique<Node>(std::move(group)));
+ group.addChild(
+ std::make_unique<Node>(Text{una::utf32to8(content)}));
+ macro.arguments.push_back(
+ std::make_unique<Node>(std::move(group)));
nodes_.push_back(std::make_unique<Node>(std::move(macro)));
pos_ = j;
return true;
@@ -165,27 +209,28 @@ private:
// Enforces strict rules: no whitespace inside, no punctuation boundaries.
bool handleDelimitedMarkup()
{
- for (const auto& info : d_infos_)
+ for(const auto& info : d_infos_)
{
- if (input32_[pos_] == info.cp)
+ if(input32_[pos_] == info.cp)
{
size_t j = pos_ + 1;
bool valid = true;
bool found_end = false;
- while (j < input32_.length())
+ while(j < input32_.length())
{
char32_t next_c = input32_[j];
- if (next_c == info.cp)
+ if(next_c == info.cp)
{
found_end = true;
break;
}
- if (una::codepoint::is_whitespace(next_c))
+ if(una::codepoint::is_whitespace(next_c))
{
valid = false;
break;
}
- if (next_c != '_' && next_c != '-' && una::codepoint::prop{next_c}.General_Category_P())
+ if(next_c != '_' && next_c != '-' &&
+ una::codepoint::prop{next_c}.General_Category_P())
{
valid = false;
break;
@@ -193,15 +238,18 @@ private:
j++;
}
- if (found_end && valid && j > pos_ + 1)
+ if(found_end && valid && j > pos_ + 1)
{
flushText();
- std::u32string content = input32_.substr(pos_ + 1, j - (pos_ + 1));
+ std::u32string content =
+ input32_.substr(pos_ + 1, j - (pos_ + 1));
Macro macro;
macro.name = info.macro;
Group group;
- group.addChild(std::make_unique<Node>(Text{una::utf32to8(content)}));
- macro.arguments.push_back(std::make_unique<Node>(std::move(group)));
+ group.addChild(
+ std::make_unique<Node>(Text{una::utf32to8(content)}));
+ macro.arguments.push_back(
+ std::make_unique<Node>(std::move(group)));
nodes_.push_back(std::make_unique<Node>(std::move(macro)));
pos_ = j + 1;
return true;
@@ -212,52 +260,59 @@ private:
}
// Handles standard macro calls (e.g., %name{arg}).
- // Parses the macro name and recursively parses its arguments enclosed in {} or [].
+ // Parses the macro name and recursively parses its arguments enclosed in {}
+ // or [].
bool handleMacro()
{
- if (input32_[pos_] == '%')
+ if(input32_[pos_] == '%')
{
flushText();
pos_++; // skip %
size_t name_start = pos_;
- while (pos_ < input32_.length() && (una::codepoint::is_alphanumeric(input32_[pos_]) || input32_[pos_] == '_'))
+ while(pos_ < input32_.length() &&
+ (una::codepoint::is_alphanumeric(input32_[pos_]) ||
+ input32_[pos_] == '_'))
{
pos_++;
}
- std::u32string name32 = input32_.substr(name_start, pos_ - name_start);
+ std::u32string name32 =
+ input32_.substr(name_start, pos_ - name_start);
std::string name = una::utf32to8(name32);
- if (name.empty())
+ if(name.empty())
{
current_text_ += '%';
// Continue from loop
- return true;
+ return true;
}
Macro macro;
macro.name = name;
- while (pos_ < input32_.length())
+ while(pos_ < input32_.length())
{
char32_t open = input32_[pos_];
- if (open == '{' || open == '[')
+ if(open == '{' || open == '[')
{
char32_t close = (open == '{') ? '}' : ']';
pos_++;
std::u32string arg_content32;
int balance = 1;
- while (pos_ < input32_.length() && balance > 0)
+ while(pos_ < input32_.length() && balance > 0)
{
- if (input32_[pos_] == open)
+ if(input32_[pos_] == open)
{
balance++;
arg_content32 += input32_[pos_];
}
- else if (input32_[pos_] == close)
+ else if(input32_[pos_] == close)
{
balance--;
- if (balance > 0) arg_content32 += input32_[pos_];
+ if(balance > 0)
+ {
+ arg_content32 += input32_[pos_];
+ }
}
else
{
@@ -267,12 +322,15 @@ private:
}
Group group;
- auto sub_nodes = Parser::parse(una::utf32to8(arg_content32), prefix_markups_, delimited_markups_);
- for (auto& n : sub_nodes)
+ auto sub_nodes =
+ Parser::parse(una::utf32to8(arg_content32),
+ prefix_markups_, delimited_markups_);
+ for(auto& n : sub_nodes)
{
group.addChild(std::move(n));
}
- macro.arguments.push_back(std::make_unique<Node>(std::move(group)));
+ macro.arguments.push_back(
+ std::make_unique<Node>(std::move(group)));
}
else
{
@@ -288,11 +346,11 @@ private:
// Handles inline code blocks enclosed in backticks (`code`).
bool handleCode()
{
- if (input32_[pos_] == '`')
+ if(input32_[pos_] == '`')
{
size_t start = pos_ + 1;
size_t end = input32_.find('`', start);
- if (end != std::u32string::npos)
+ if(end != std::u32string::npos)
{
flushText();
std::u32string content32 = input32_.substr(start, end - start);
@@ -301,9 +359,11 @@ private:
macro.name = "code";
Group group;
- group.addChild(std::make_unique<Node>(Text{una::utf32to8(content32)}));
+ group.addChild(
+ std::make_unique<Node>(Text{una::utf32to8(content32)}));
- macro.arguments.push_back(std::make_unique<Node>(std::move(group)));
+ macro.arguments.push_back(
+ std::make_unique<Node>(std::move(group)));
nodes_.push_back(std::make_unique<Node>(std::move(macro)));
pos_ = end + 1;
return true;
@@ -315,11 +375,17 @@ private:
// Handles Markdown images ().
bool handleImage()
{
- if (pos_ + 1 >= input32_.length() || input32_[pos_] != '!' || input32_[pos_ + 1] != '[')
+ if(pos_ + 1 >= input32_.length() || input32_[pos_] != '!' ||
+ input32_[pos_ + 1] != '[')
+ {
return false;
+ }
auto res = findLabelAndUrl(pos_ + 2);
- if (!res) return false;
+ if(!res)
+ {
+ return false;
+ }
flushText();
addLinkLikeMacro("img", res->url, res->label);
@@ -331,11 +397,16 @@ private:
// Recursively parses the link text.
bool handleLink()
{
- if (input32_[pos_] != '[')
+ if(input32_[pos_] != '[')
+ {
return false;
+ }
auto res = findLabelAndUrl(pos_ + 1);
- if (!res) return false;
+ if(!res)
+ {
+ return false;
+ }
flushText();
addLinkLikeMacro("link", res->url, res->label);
@@ -354,33 +425,48 @@ private:
{
size_t j = label_start;
int bracket_bal = 1;
- while (j < input32_.length() && bracket_bal > 0)
+ while(j < input32_.length() && bracket_bal > 0)
{
- if (input32_[j] == '[') bracket_bal++;
- else if (input32_[j] == ']') bracket_bal--;
- if (bracket_bal > 0) j++;
+ if(input32_[j] == '[')
+ {
+ bracket_bal++;
+ }
+ else if(input32_[j] == ']')
+ {
+ bracket_bal--;
+ }
+ if(bracket_bal > 0)
+ {
+ j++;
+ }
}
- if (j >= input32_.length() || bracket_bal != 0)
+ if(j >= input32_.length() || bracket_bal != 0)
+ {
return std::nullopt;
+ }
size_t close_bracket = j;
- if (close_bracket + 1 >= input32_.length() || input32_[close_bracket + 1] != '(')
+ if(close_bracket + 1 >= input32_.length() ||
+ input32_[close_bracket + 1] != '(')
+ {
return std::nullopt;
+ }
size_t url_start = close_bracket + 2;
size_t url_end = input32_.find(')', url_start);
- if (url_end == std::u32string::npos)
+ if(url_end == std::u32string::npos)
+ {
return std::nullopt;
+ }
return LinkResult{
input32_.substr(label_start, close_bracket - label_start),
- input32_.substr(url_start, url_end - url_start),
- url_end + 1
- };
+ input32_.substr(url_start, url_end - url_start), url_end + 1};
}
- void addLinkLikeMacro(const std::string& name, const std::u32string& url, const std::u32string& label)
+ void addLinkLikeMacro(const std::string& name, const std::u32string& url,
+ const std::u32string& label)
{
Macro macro;
macro.name = name;
@@ -392,8 +478,12 @@ private:
// Arg 2: Label (parsed)
Group group2;
- auto sub = Parser::parse(una::utf32to8(label), prefix_markups_, delimited_markups_);
- for (auto& n : sub) group2.addChild(std::move(n));
+ auto sub = Parser::parse(una::utf32to8(label), prefix_markups_,
+ delimited_markups_);
+ for(auto& n : sub)
+ {
+ group2.addChild(std::move(n));
+ }
macro.arguments.push_back(std::make_unique<Node>(std::move(group2)));
nodes_.push_back(std::make_unique<Node>(std::move(macro)));
@@ -403,26 +493,33 @@ private:
// Recursively parses the content.
bool handleEmphasis()
{
- if (input32_[pos_] == '*')
+ if(input32_[pos_] == '*')
{
- bool strong = (pos_ + 1 < input32_.length() && input32_[pos_ + 1] == '*');
+ bool strong =
+ (pos_ + 1 < input32_.length() && input32_[pos_ + 1] == '*');
size_t start_content = pos_ + (strong ? 2 : 1);
std::u32string delim = strong ? U"**" : U"*";
size_t end = input32_.find(delim, start_content);
- if (end != std::u32string::npos)
+ if(end != std::u32string::npos)
{
flushText();
- std::u32string content32 = input32_.substr(start_content, end - start_content);
+ std::u32string content32 =
+ input32_.substr(start_content, end - start_content);
Macro macro;
macro.name = strong ? "strong" : "em";
Group group;
- auto sub = Parser::parse(una::utf32to8(content32), prefix_markups_, delimited_markups_);
- for (auto& n : sub) group.addChild(std::move(n));
- macro.arguments.push_back(std::make_unique<Node>(std::move(group)));
+ auto sub = Parser::parse(una::utf32to8(content32),
+ prefix_markups_, delimited_markups_);
+ for(auto& n : sub)
+ {
+ group.addChild(std::move(n));
+ }
+ macro.arguments.push_back(
+ std::make_unique<Node>(std::move(group)));
nodes_.push_back(std::make_unique<Node>(std::move(macro)));
pos_ = end + delim.length();
@@ -435,10 +532,10 @@ private:
} // namespace
-std::vector<std::unique_ptr<Node>> Parser::parse(
- const std::string& input,
- const std::vector<PrefixMarkup>& prefix_markups,
- const std::vector<DelimitedMarkup>& delimited_markups)
+std::vector<std::unique_ptr<Node>>
+Parser::parse(const std::string& input,
+ const std::vector<PrefixMarkup>& prefix_markups,
+ const std::vector<DelimitedMarkup>& delimited_markups)
{
InlineParser parser(input, prefix_markups, delimited_markups);
return parser.parse();
diff --git a/src/standard_library.cpp b/src/standard_library.cpp
index 1760af4..37f9ef9 100644
--- a/src/standard_library.cpp
+++ b/src/standard_library.cpp
@@ -6,19 +6,25 @@ namespace macrodown
void StandardLibrary::registerMacros(Evaluator& evaluator)
{
// Blocks
- evaluator.defineIntrinsic("p", [](const std::vector<std::string>& args) -> std::string
- {
- if(args.empty()) return "";
- std::string content = args[0];
- // Check if content is empty or just whitespace
- if(content.find_first_not_of(" \t\n\r") == std::string::npos)
+ evaluator.defineIntrinsic(
+ "p",
+ [](const std::vector<std::string>& args) -> std::string
{
- return "";
- }
- return "<p>" + content + "</p>\n";
- });
+ if(args.empty())
+ {
+ return "";
+ }
+ std::string content = args[0];
+ // Check if content is empty or just whitespace
+ if(content.find_first_not_of(" \t\n\r") == std::string::npos)
+ {
+ return "";
+ }
+ return "<p>" + content + "</p>\n";
+ });
- evaluator.define("quote", {"content"}, "<blockquote>\n%content</blockquote>\n");
+ evaluator.define("quote", {"content"},
+ "<blockquote>\n%content</blockquote>\n");
// Headings
evaluator.define("h1", {"content"}, "<h1>%content</h1>\n");
@@ -28,24 +34,31 @@ void StandardLibrary::registerMacros(Evaluator& evaluator)
evaluator.define("h5", {"content"}, "<h5>%content</h5>\n");
evaluator.define("h6", {"content"}, "<h6>%content</h6>\n");
- evaluator.defineIntrinsic("fenced_code", [](const std::vector<std::string>& args) -> std::string
- {
- if(args.size() < 2) return "";
- std::string info = args[0];
- std::string content = args[1];
- if(!info.empty())
+ evaluator.defineIntrinsic(
+ "fenced_code",
+ [](const std::vector<std::string>& args) -> std::string
{
- return "<pre><code class=\"language-" + info + "\">" + content + "\n</code></pre>\n";
- }
- return "<pre><code>" + content + "\n</code></pre>\n";
- });
+ if(args.size() < 2)
+ {
+ return "";
+ }
+ std::string info = args[0];
+ std::string content = args[1];
+ if(!info.empty())
+ {
+ return "<pre><code class=\"language-" + info + "\">" + content +
+ "\n</code></pre>\n";
+ }
+ return "<pre><code>" + content + "\n</code></pre>\n";
+ });
// Inline
evaluator.define("em", {"content"}, "<em>%content</em>");
evaluator.define("strong", {"content"}, "<strong>%content</strong>");
evaluator.define("code", {"content"}, "<code>%content</code>");
evaluator.define("link", {"url", "text"}, "<a href=\"%url\">%text</a>");
- evaluator.define("img", {"url", "alt"}, "<img src=\"%url\" alt=\"%alt\" />");
+ evaluator.define("img", {"url", "alt"},
+ "<img src=\"%url\" alt=\"%alt\" />");
}
} // namespace macrodown