logseq2gemtext/src/ArticleParser.php

111 lines
3.2 KiB
PHP

<?php
declare(strict_types=1);
namespace LogseqGem;
use League\CommonMark\Node\Inline\Text;
use League\CommonMark\Extension\CommonMark\Node\Inline\Link;
use League\CommonMark\Extension\CommonMark\Node\Block\ListBlock;
use League\CommonMark\Extension\CommonMark\Node\Block\ListItem;
class ArticleParser extends Parser {
private ?array $properties;
public function convert(): string {
$gemtext = [];
$this->properties = [];
$in_frontmatter = true;
$node = null; //current node
$last_node = null; //updated every time
$previous_node = null; // we only update this for significant nodes
$block_level = 0;
$list_item = 0;
$walker = $this->document->walker();
while ($event = $walker->next()) {
$entering = $event->isEntering();
$last_node = $node;
$node = $event->getNode();
// treat empty node as a newline
if ($node instanceof ListItem && $last_node === $node && !$entering) {
$gemtext[] = '';
continue;
}
if (!$entering) {
if ($node instanceof ListBlock) {
$block_level -= 1;
$list_item = 0;
}
continue;
}
if ($node instanceof ListBlock) {
$block_level += 1;
$in_frontmatter = false;
continue;
}
if ($node instanceof Text) {
$text = $node->getLiteral();
if ($in_frontmatter) {
if (preg_match('/^([A-Za-z-]+):: (.*)/', $text, $matches)) {
$this->properties[$matches[1]] = $matches[2];
continue;
} else {
$in_frontmatter = false;
}
}
if ($block_level > 1) {
$list_item += 1;
$leader = $list_item === 1 ? "\n" : "";
$gemtext[] = $leader . '* ' . $text;
} else {
$leader = $previous_node ? "\n" : "";
$gemtext[] = $leader . $text;
}
$previous_node = $node;
continue;
}
if ($node instanceof Link) {
$label = $walker->next()->getNode();
if (!$label instanceof Text) {
throw new \Exception('Expected next node in Link to be Text, got ' . get_class($label));
}
$leader = ($previous_node && !$previous_node instanceof Link) ? "\n" : "";
$gemtext[] = $leader . sprintf("=> %s %s", $node->getUrl(), $label->getLiteral());
$previous_node = $node;
continue;
}
}
return implode("\n", $gemtext);
}
public function getProperties(): array {
if (is_null($this->properties)) {
throw new \Exception("Cannot get properties before converting a document");
}
return $this->properties;
}
public function isPublished(): bool {
return (($this->getProperties()['status'] ?? '') === 'published');
}
}