#include "block_parser.h"
#include <sstream>
#include <iostream>
#include <algorithm>
namespace macrodown
{
namespace
{
// Helper: check if line is blank (only spaces)
bool is_blank(const std::string& line)
{
return std::all_of(line.begin(), line.end(), [](unsigned char c){ return std::isspace(c); });
}
// Helper: count indentation
size_t count_indent(const std::string& line, size_t offset)
{
size_t count = 0;
while(offset + count < line.size() && line[offset + count] == ' ')
{
count++;
}
return count;
}
} // namespace
BlockParser::BlockParser()
{
root = std::make_unique<Block>(BlockType::Document);
open_blocks.push_back({root.get()});
}
std::unique_ptr<Block> BlockParser::parse(const std::string& input)
{
BlockParser parser;
std::istringstream stream(input);
std::string line;
while(std::getline(stream, line))
{
// Remove \r if present (Windows line endings)
if(!line.empty() && line.back() == '\r')
{
line.pop_back();
}
parser.process_line(line);
}
// Close all remaining blocks
parser.close_unmatched_blocks(0); // 0 means keep only root, but since we are done, close everything
// Actually we don't "close" root in the stack sense, but we mark open=false
parser.root->open = false;
return std::move(parser.root);
}
bool BlockParser::is_container(BlockType type)
{
return type == BlockType::Document ||
type == BlockType::Quote ||
type == BlockType::List ||
type == BlockType::ListItem;
}
// Determines if an open block matches the current line
// Updates offset to consume markers
bool BlockParser::matches(Block* block, const std::string& line, size_t& offset)
{
if(block->type == BlockType::Document)
{
return true; // Document always matches
}
if(block->type == BlockType::Quote)
{
size_t indent = count_indent(line, offset);
if(indent < 4)
{
if(offset + indent < line.size() && line[offset + indent] == '>')
{
offset += indent + 1; // Consume indent and '>'
if(offset < line.size() && line[offset] == ' ') offset++; // Optional space
return true;
}
}
return false;
}
if(block->type == BlockType::Paragraph)
{
if(is_blank(line)) return false;
// Paragraphs match unless interrupted by a new block type
size_t indent = count_indent(line, offset);
if(indent < 4)
{
// Check for BlockQuote
if(offset + indent < line.size() && line[offset + indent] == '>') return false;
// Check for ATX Heading
size_t check_pos = offset + indent;
if(check_pos < line.size() && line[check_pos] == '#')
{
// Confirm it's a heading (sequence of # followed by space or end)
size_t hash_count = 0;
while(check_pos + hash_count < line.size() && line[check_pos + hash_count] == '#' && hash_count < 6)
{
hash_count++;
}
if(hash_count > 0 && (check_pos + hash_count == line.size() || line[check_pos + hash_count] == ' '))
{
return false;
}
}
}
// It's a continuation
return true;
}
return false;
}
void BlockParser::process_line(const std::string& line)
{
size_t offset = 0;
size_t matches_count = 0;
// 1. Find matches in open blocks
matches_count = 0; // Root always matches
for(size_t i = 1; i < open_blocks.size(); ++i)
{
if(matches(open_blocks[i].block, line, offset))
{
matches_count = i;
}
else
{
break;
}
}
// 2. Close unmatched blocks
close_unmatched_blocks(matches_count);
// 3. Open new blocks
// Scan rest of line (at offset)
// Check for BlockQuote
while(true)
{
size_t indent = count_indent(line, offset);
if(indent < 4 && offset + indent < line.size() && line[offset + indent] == '>')
{
offset += indent + 1;
if(offset < line.size() && line[offset] == ' ') offset++;
auto new_block = std::make_unique<Block>(BlockType::Quote);
Block* ptr = new_block.get();
open_blocks.back().block->children.push_back(std::move(new_block));
open_blocks.push_back({ptr});
}
else
{
break;
}
}
// 4. Handle Leaf Blocks (Heading, ThematicBreak) or continuation
Block* tip = open_blocks.back().block;
// If tip is a Paragraph, check for blank line (closes it)
if(tip->type == BlockType::Paragraph)
{
if(is_blank(line))
{
close_unmatched_blocks(open_blocks.size() - 2); // Close paragraph
return;
}
// Else, it's a continuation
// (Unless it's interrupted by a Heading/Quote etc. - Simplified: we assume it continues)
// Strictly, we should check if the line *starts* a new block (like Header)
// If it does, we close the paragraph.
}
// Check for ATX Heading
size_t indent = count_indent(line, offset);
if(indent < 4)
{
size_t check_pos = offset + indent;
size_t hash_count = 0;
while(check_pos + hash_count < line.size() && line[check_pos + hash_count] == '#' && hash_count < 6)
{
hash_count++;
}
if(hash_count > 0 && (check_pos + hash_count == line.size() || line[check_pos + hash_count] == ' '))
{
// Found Heading
// If we were in a paragraph, close it
if(tip->type == BlockType::Paragraph)
{
close_unmatched_blocks(open_blocks.size() - 2);
tip = open_blocks.back().block;
}
auto heading = std::make_unique<Block>(BlockType::Heading);
heading->level = hash_count;
// Content is the rest of the line (trimmed)
size_t content_start = check_pos + hash_count;
while(content_start < line.size() && line[content_start] == ' ') content_start++;
heading->literal_content = line.substr(content_start);
// Remove trailing hashes? CommonMark says yes. Optional for now.
heading->open = false; // Headings are single line
tip->children.push_back(std::move(heading));
return;
}
}
// 5. Finalize: Text or Paragraph
if(is_blank(line))
{
return; // Ignore blank lines if not ending a paragraph
}
if(tip->type == BlockType::Document || tip->type == BlockType::Quote || tip->type == BlockType::List || tip->type == BlockType::ListItem)
{
// Create new Paragraph
auto p = std::make_unique<Block>(BlockType::Paragraph);
Block* p_ptr = p.get();
tip->children.push_back(std::move(p));
open_blocks.push_back({p_ptr});
// Add text
// Note: indentation in paragraph text is preserved but leading spaces of the first line?
// CommonMark: stripped.
size_t content_start = offset + count_indent(line, offset);
p_ptr->literal_content = line.substr(content_start);
}
else if(tip->type == BlockType::Paragraph)
{
// Continuation
// Remove leading spaces up to indent? simplified: just add space + text
size_t content_start = offset + count_indent(line, offset);
tip->literal_content += "\n" + line.substr(content_start);
}
}
void BlockParser::close_unmatched_blocks(size_t last_matched_index)
{
while(open_blocks.size() > last_matched_index + 1)
{
Block* block = open_blocks.back().block;
block->open = false;
open_blocks.pop_back();
}
}
} // namespace macrodown