Skip to content

Commit 287767d

Browse files
authored
Merge pull request #50 from imjohnbo/treewalker
Fix: target correct link when multiple matches are present
2 parents 2906188 + d3ec435 commit 287767d

File tree

2 files changed

+71
-50
lines changed

2 files changed

+71
-50
lines changed

src/paste-markdown-html.ts

Lines changed: 49 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,6 @@ export function uninstall(el: HTMLElement): void {
88
el.removeEventListener('paste', onPaste)
99
}
1010

11-
type MarkdownTransformer = (element: HTMLElement | HTMLAnchorElement, args: string[]) => string
12-
1311
function onPaste(event: ClipboardEvent) {
1412
const transfer = event.clipboardData
1513
// if there is no clipboard data, or
@@ -20,65 +18,78 @@ function onPaste(event: ClipboardEvent) {
2018
if (!(field instanceof HTMLTextAreaElement)) return
2119

2220
// Get the plaintext and html version of clipboard contents
23-
let text = transfer.getData('text/plain')
21+
let plaintext = transfer.getData('text/plain')
2422
const textHTML = transfer.getData('text/html')
2523
// Replace Unicode equivalent of "&nbsp" with a space
26-
const textHTMLClean = textHTML.replace(/\u00A0/g, ' ')
24+
const textHTMLClean = textHTML.replace(/\u00A0/g, ' ').replace(/\uC2A0/g, ' ')
2725
if (!textHTML) return
2826

29-
text = text.trim()
30-
if (!text) return
27+
plaintext = plaintext.trim()
28+
if (!plaintext) return
3129

3230
// Generate DOM tree from HTML string
3331
const parser = new DOMParser()
3432
const doc = parser.parseFromString(textHTMLClean, 'text/html')
33+
const walker = doc.createTreeWalker(doc.body, NodeFilter.SHOW_ELEMENT)
3534

36-
const a = doc.getElementsByTagName('a')
37-
const markdown = transform(a, text, linkify as MarkdownTransformer)
35+
const markdown = convertToMarkdown(plaintext, walker)
3836

3937
// If no changes made by transforming
40-
if (markdown === text) return
38+
if (markdown === plaintext) return
4139

4240
event.stopPropagation()
4341
event.preventDefault()
4442

4543
insertText(field, markdown)
4644
}
4745

48-
// Build a markdown string from a DOM tree and plaintext
49-
function transform(
50-
elements: HTMLCollectionOf<HTMLElement>,
51-
text: string,
52-
transformer: MarkdownTransformer,
53-
...args: string[]
54-
): string {
55-
const markdownParts = []
56-
for (const element of elements) {
57-
const textContent = element.textContent || ''
58-
const {part, index} = trimAfter(text, textContent)
59-
if (index >= 0) {
60-
markdownParts.push(part.replace(textContent, transformer(element, args)))
61-
text = text.slice(index)
46+
function convertToMarkdown(plaintext: string, walker: TreeWalker): string {
47+
let currentNode = walker.firstChild()
48+
let markdown = plaintext
49+
let markdownIgnoreBeforeIndex = 0
50+
let index = 0
51+
const NODE_LIMIT = 10000
52+
53+
// Walk through the DOM tree
54+
while (currentNode && index < NODE_LIMIT) {
55+
index++
56+
const text = isLink(currentNode) ? currentNode.textContent || '' : (currentNode.firstChild as Text)?.wholeText || ''
57+
58+
// No need to transform whitespace
59+
if (isEmptyString(text)) {
60+
currentNode = walker.nextNode()
61+
continue
62+
}
63+
64+
// Find the index where "text" is found in "markdown" _after_ "markdownIgnoreBeforeIndex"
65+
const markdownFoundIndex = markdown.indexOf(text, markdownIgnoreBeforeIndex)
66+
67+
if (markdownFoundIndex >= 0) {
68+
if (isLink(currentNode)) {
69+
const markdownLink = linkify(currentNode)
70+
// Transform 'example link plus more text' into 'example [link](example link) plus more text'
71+
// Method: 'example [link](example link) plus more text' = 'example ' + '[link](example link)' + ' plus more text'
72+
markdown =
73+
markdown.slice(0, markdownFoundIndex) + markdownLink + markdown.slice(markdownFoundIndex + text.length)
74+
markdownIgnoreBeforeIndex = markdownFoundIndex + markdownLink.length
75+
} else {
76+
markdownIgnoreBeforeIndex = markdownFoundIndex + text.length
77+
}
6278
}
79+
80+
currentNode = walker.nextNode()
6381
}
64-
markdownParts.push(text)
65-
return markdownParts.join('')
66-
}
6782

68-
// Trim text at index of last character of the first occurrence of "search" and
69-
// return a new string with the substring until the index
70-
// Example: trimAfter('Hello world', 'world') => {part: 'Hello world', index: 11}
71-
// Example: trimAfter('Hello world', 'bananas') => {part: '', index: -1}
72-
function trimAfter(text: string, search = ''): {part: string; index: number} {
73-
let index = text.indexOf(search)
74-
if (index === -1) return {part: '', index}
83+
// Unless we hit the node limit, we should have processed all nodes
84+
return index === NODE_LIMIT ? plaintext : markdown
85+
}
7586

76-
index += search.length
87+
function isEmptyString(text: string): boolean {
88+
return !text || text?.trim().length === 0
89+
}
7790

78-
return {
79-
part: text.substring(0, index),
80-
index
81-
}
91+
function isLink(node: Node): node is HTMLAnchorElement {
92+
return (node as HTMLElement).tagName?.toLowerCase() === 'a' && (node as HTMLElement).hasAttribute('href')
8293
}
8394

8495
function hasHTML(transfer: DataTransfer): boolean {

test/test.js

Lines changed: 22 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -132,10 +132,10 @@ describe('paste-markdown', function () {
132132

133133
it('turns mixed html content containing several links into appropriate markdown', function () {
134134
// eslint-disable-next-line github/unescaped-html-literal
135-
const sentence = `<meta charset='utf-8'><meta charset="utf-8">
135+
const sentence = `<meta charset='utf-8'>
136136
<b style="font-weight:normal;"><p dir="ltr"><span>This is a </span>
137-
<a href="https://github.com/"><span>link</span></a><span> and </span>
138-
<a href="https://www.youtube.com/watch?v=dQw4w9WgXcQ"><span>another link</span></a></p>
137+
<a href="https://github.com/">link</a><span> and </span>
138+
<a href="https://www.youtube.com/watch?v=dQw4w9WgXcQ">another link</a></p>
139139
<br /><a href="https://github.com/"><span>Link</span></a><span> at the beginning, link at the </span>
140140
<a href="https://github.com/"><span>end</span></a></b>`
141141
// eslint-disable-next-line i18n-text/no-en
@@ -186,19 +186,29 @@ describe('paste-markdown', function () {
186186

187187
it('leaves plaintext links alone', function () {
188188
// eslint-disable-next-line github/unescaped-html-literal
189-
const sentence = `<meta charset='utf-8'><meta charset="utf-8">
189+
const sentence = `<meta charset='utf-8'>
190190
<b style="font-weight:normal;"><p dir="ltr"><span>This is a </span>
191-
<a href="https://github.com/"><span>https://github.com</span></a><span> and </span>
192-
<a href="https://www.youtube.com/watch?v=dQw4w9WgXcQ"><span>another link</span></a></p>
193-
<br /><a href="https://github.com/"><span>Link</span></a><span> at the beginning, link at the </span>
194-
<a href="https://github.com/"><span>https://github.com/</span></a></b>`
191+
<a href="https://github.com/">link</a><span> and </span>
192+
<a href="https://www.youtube.com/watch?v=dQw4w9WgXcQ">another link</a></p>
193+
<br /><a href="https://github.com/">Link</a><span> at the beginning, link at the </span>
194+
<a href="https://github.com/"><span>end</span></a></b>`
195195
/* eslint-disable i18n-text/no-en */
196-
const plaintextSentence =
197-
'This is a https://github.com and another link\n\nLink at the beginning, link at the https://github.com/'
196+
const plaintextSentence = 'This is a link and another link\n\nLink at the beginning, link at the end'
198197
/* eslint-enable i18n-text/no-en */
199198
const markdownSentence =
200-
'This is a https://github.com/ and [another link](https://www.youtube.com/watch?v=dQw4w9WgXcQ)\n\n' +
201-
'[Link](https://github.com/) at the beginning, link at the https://github.com/'
199+
'This is a [link](https://github.com/) and [another link](https://www.youtube.com/watch?v=dQw4w9WgXcQ)\n\n' +
200+
'[Link](https://github.com/) at the beginning, link at the [end](https://github.com/)'
201+
202+
paste(textarea, {'text/html': sentence, 'text/plain': plaintextSentence})
203+
assert.equal(textarea.value, markdownSentence)
204+
})
205+
206+
it('finds the right link when identical labels are present', function () {
207+
// eslint-disable-next-line github/unescaped-html-literal
208+
const sentence = `<meta charset='utf-8'><span>example<span> </span>
209+
</span><a href="https://example.com/">example</a>`
210+
const plaintextSentence = 'example example'
211+
const markdownSentence = 'example [example](https://example.com/)'
202212

203213
paste(textarea, {'text/html': sentence, 'text/plain': plaintextSentence})
204214
assert.equal(textarea.value, markdownSentence)

0 commit comments

Comments
 (0)