Changes
diff --git a/README.md b/README.md
index e16d2c6..06f6952 100644
--- a/README.md
+++ b/README.md
@@ -105,7 +105,7 @@ Useful for tags or mentions. A prefix markup starts with a character and ends at
```cpp
// Transforms #test into %tag{test}
-md.definePrefixMarkup({"#", "tag"});
+md.definePrefixMarkup({"#", "tag", ""});
md.evaluator().define("tag", {"content"}, "<span class=\"tag\">#%content</span>");
```
diff --git a/design.md b/design.md
index 1e3b9f1..87dee36 100644
--- a/design.md
+++ b/design.md
@@ -34,10 +34,10 @@ We strictly follow the CommonMark "Appendix A" strategy:
* `*Bold*` $\rightarrow$ `%em{Bold}`
### 2.3 Custom Markups
-The system supports user-defined custom markups that map to macros.
-* **Prefix Markup**: Starts with a specific character (e.g., `#tag`) and ends at a whitespace or punctuation boundary (except `_`).
+The system supports user-defined custom markups that map to macros. The content of the markup is determined by a regular expression.
+* **Prefix Markup**: Starts with a specific character (e.g., `#tag`) and captures text matching a regex pattern. By default, it ends at a whitespace or punctuation boundary (except `_`, `-`, `@`, and `.`).
* Example: `#tag` $\rightarrow$ `%tag_macro{tag}`
-* **Delimited Markup**: Starts and ends with the same character (e.g., `:highlight:`) with no spaces inside and strict punctuation rules.
+* **Delimited Markup**: Starts and ends with the same character (e.g., `:highlight:`) and captures text matching a regex pattern. No whitespace is allowed inside.
* Example: `:highlight:` $\rightarrow$ `%highlight_macro{highlight}`
## 3. Data Structures
@@ -116,11 +116,13 @@ Users can define custom markups that are transformed into macros during the inli
struct PrefixMarkup {
std::string prefix; // The trigger character(s)
std::string macro_name; // Target macro to transform into
+ std::string pattern; // Regex pattern for the marked-up text
};
struct DelimitedMarkup {
std::string delimiter; // The character used for start and end
std::string macro_name; // Target macro to transform into
+ std::string pattern; // Regex pattern for the content between delimiters
};
```
diff --git a/include/markups.h b/include/markups.h
index d2e136c..e666166 100644
--- a/include/markups.h
+++ b/include/markups.h
@@ -10,12 +10,14 @@ struct PrefixMarkup
{
std::string prefix;
std::string macro_name;
+ std::string pattern;
};
struct DelimitedMarkup
{
std::string delimiter;
std::string macro_name;
+ std::string pattern;
};
} // namespace macrodown
diff --git a/prd.md b/prd.md
index 73d17ea..61d9a9a 100644
--- a/prd.md
+++ b/prd.md
@@ -69,7 +69,8 @@ The user will be able to define two kinds of markup:
as a prefix, then in `It’s a #test.`, `#test` is the part that’s
being marked up. In other words, for a prefix markup, the text
that’s marked up begin with the prefix, and ends with any
- whitespace or punctuation (excluding the underscroe `_`).
+ whitespace or punctuation (excluding the underscroe `_`, dash `-`,
+ at-sign `@` and dot `.`).
2. A delimited markup. This kind of custom markup has starts with a
character and ends with the same delimiting character. No
whitespace or punctuation are allowed in between, except for the
diff --git a/src/parser.cpp b/src/parser.cpp
index 54a4b97..10f8ebc 100644
--- a/src/parser.cpp
+++ b/src/parser.cpp
@@ -2,6 +2,7 @@
#include "macrodown.h"
#include "uni_algo/all.h"
#include <iostream>
+#include <regex>
namespace macrodown
{
@@ -13,12 +14,14 @@ struct PrefixInfo
{
char32_t cp;
std::string macro;
+ std::regex pattern;
};
struct DelimInfo
{
char32_t cp;
std::string macro;
+ std::regex pattern;
};
// InlineParser handles the parsing of inline elements within a block of text.
@@ -39,13 +42,13 @@ public:
{
auto cp = una::utf8to32u(m.prefix);
if (!cp.empty())
- p_infos_.push_back({cp[0], m.macro_name});
+ p_infos_.push_back({cp[0], m.macro_name, std::regex(m.pattern)});
}
for (const auto& m : delimited_markups_)
{
auto cp = una::utf8to32u(m.delimiter);
if (!cp.empty())
- d_infos_.push_back({cp[0], m.macro_name});
+ d_infos_.push_back({cp[0], m.macro_name, std::regex(m.pattern)});
}
}
@@ -120,10 +123,23 @@ private:
{
char32_t next_c = input32_[j];
if (una::codepoint::is_whitespace(next_c)) break;
- if (next_c != '_' && una::codepoint::prop{next_c}.General_Category_P()) break;
+ if (next_c != '_' && next_c != '-' && next_c != '@' && next_c != '.' && una::codepoint::prop{next_c}.General_Category_P()) break;
j++;
}
+ if (j > pos_ + 1)
+ {
+ // If the last character was a dot, and it is followed by whitespace or EOF,
+ // exclude it from the markup.
+ if (input32_[j - 1] == '.')
+ {
+ if (j == input32_.length() || una::codepoint::is_whitespace(input32_[j]))
+ {
+ j--;
+ }
+ }
+ }
+
if (j > pos_ + 1)
{
flushText();
diff --git a/tests/test_custom_markup.cpp b/tests/test_custom_markup.cpp
index a01669c..bcd7719 100644
--- a/tests/test_custom_markup.cpp
+++ b/tests/test_custom_markup.cpp
@@ -7,7 +7,7 @@ TEST(CustomMarkupTest, PrefixMarkup)
{
MacroDown md;
// Define # as prefix markup for 'tag'
- md.definePrefixMarkup({"#", "tag"});
+ md.definePrefixMarkup({"#", "tag", ""});
// We also need to define what the 'tag' macro does in HTML
md.evaluator().define("tag", {"content"}, "<span class=\"tag\">#%content</span>");
@@ -21,7 +21,7 @@ TEST(CustomMarkupTest, PrefixMarkup)
TEST(CustomMarkupTest, PrefixMarkupPunctuation)
{
MacroDown md;
- md.definePrefixMarkup({"#", "tag"});
+ md.definePrefixMarkup({"#", "tag", ""});
md.evaluator().define("tag", {"content"}, "[%content]");
// Punctuation should end the prefix markup, except for underscore
@@ -35,7 +35,7 @@ TEST(CustomMarkupTest, DelimitedMarkup)
{
MacroDown md;
// Define : as delimited markup for 'highlight'
- md.defineDelimitedMarkup({":", "highlight"});
+ md.defineDelimitedMarkup({":", "highlight", ""});
md.evaluator().define("highlight", {"content"}, "<mark>%content</mark>");
std::string input = "This is :important: text.";
@@ -47,7 +47,7 @@ TEST(CustomMarkupTest, DelimitedMarkup)
TEST(CustomMarkupTest, DelimitedMarkupInvalid)
{
MacroDown md;
- md.defineDelimitedMarkup({":", "highlight"});
+ md.defineDelimitedMarkup({":", "highlight", ""});
md.evaluator().define("highlight", {"content"}, "<mark>%content</mark>");
// Whitespace or punctuation (except _ and -) makes it invalid
@@ -61,7 +61,7 @@ TEST(CustomMarkupTest, DelimitedMarkupInvalid)
TEST(CustomMarkupTest, DelimitedMarkupUnderscoreDash)
{
MacroDown md;
- md.defineDelimitedMarkup({":", "highlight"});
+ md.defineDelimitedMarkup({":", "highlight", ""});
md.evaluator().define("highlight", {"content"}, "<mark>%content</mark>");
std::string input = "This is :valid_with-extra: text.";
@@ -74,7 +74,7 @@ TEST(CustomMarkupTest, UnicodeMarkup)
{
MacroDown md;
// Multi-byte prefix (e.g., symbol)
- md.definePrefixMarkup({"§", "section"});
+ md.definePrefixMarkup({"§", "section", ""});
md.evaluator().define("section", {"content"}, "Sec. %content");
std::string input = "See §A.1";
@@ -82,3 +82,43 @@ TEST(CustomMarkupTest, UnicodeMarkup)
EXPECT_EQ(html, "<p>See Sec. A.1</p>\n");
}
+
+TEST(CustomMarkupTest, PrefixMarkupExtendedCharacters)
+{
+ MacroDown md;
+ md.definePrefixMarkup({"#", "tag", ""});
+ md.evaluator().define("tag", {"content"}, "[%content]");
+
+ // Dash and @ should NOT end the prefix markup per updated PRD
+ std::string input = "#test-case #user@domain";
+ std::string html = md.render(*md.parse(input));
+
+ EXPECT_EQ(html, "<p>[test-case] [user@domain]</p>\n");
+}
+
+TEST(CustomMarkupTest, PrefixMarkupAtPrefix)
+{
+ MacroDown md;
+ md.definePrefixMarkup({"@", "mention", ""});
+ md.evaluator().define("mention", {"content"}, "<a href=\" /u/%content\">@%content</a>");
+
+ // Dash, @ and . allowed inside.
+ std::string input = "Hello @user-name! Email: @user@domain.com";
+ std::string html = md.render(*md.parse(input));
+
+ EXPECT_EQ(html, "<p>Hello <a href=\" /u/user-name\">@user-name</a>! Email: <a href=\" /u/user@domain.com\">@user@domain.com</a></p>\n");
+}
+
+TEST(CustomMarkupTest, PrefixMarkupTrailingDot)
+{
+ MacroDown md;
+ md.definePrefixMarkup({"@", "mention", ""});
+ md.evaluator().define("mention", {"content"}, "(%content)");
+
+ std::string input = "End of sentence @mention. Another @mention..";
+ std::string html = md.render(*md.parse(input));
+
+ // @mention. -> mention
+ // @mention.. -> mention. (first dot kept, second excluded)
+ EXPECT_EQ(html, "<p>End of sentence (mention). Another (mention.).</p>\n");
+}
\ No newline at end of file