From c68e2b2d223c57836e04e18105255cf0e10ae75b Mon Sep 17 00:00:00 2001 From: Chocobozzz Date: Fri, 4 Feb 2022 10:31:54 +0100 Subject: [PATCH] Fix plaintext markdown converter --- .../core/renderer/html-renderer.service.ts | 4 +- server/controllers/feeds.ts | 6 +- server/helpers/markdown.ts | 68 ++++++++++++++----- server/lib/client-html.ts | 8 +-- server/tests/helpers/index.ts | 1 + server/tests/helpers/markdown.ts | 34 ++++++++++ shared/core-utils/renderer/html.ts | 10 ++- 7 files changed, 104 insertions(+), 27 deletions(-) create mode 100644 server/tests/helpers/markdown.ts diff --git a/client/src/app/core/renderer/html-renderer.service.ts b/client/src/app/core/renderer/html-renderer.service.ts index 418d8603e..d158519f8 100644 --- a/client/src/app/core/renderer/html-renderer.service.ts +++ b/client/src/app/core/renderer/html-renderer.service.ts @@ -1,6 +1,6 @@ import { Injectable } from '@angular/core' +import { getCustomMarkupSanitizeOptions, getDefaultSanitizeOptions } from '@shared/core-utils/renderer/html' import { LinkifierService } from './linkifier.service' -import { getCustomMarkupSanitizeOptions, getSanitizeOptions } from '@shared/core-utils/renderer/html' @Injectable() export class HtmlRendererService { @@ -30,7 +30,7 @@ export class HtmlRendererService { const options = additionalAllowedTags.length !== 0 ? getCustomMarkupSanitizeOptions(additionalAllowedTags) - : getSanitizeOptions() + : getDefaultSanitizeOptions() return this.sanitizeHtml(html, options) } diff --git a/server/controllers/feeds.ts b/server/controllers/feeds.ts index 3c8680ca4..e6cdaf94b 100644 --- a/server/controllers/feeds.ts +++ b/server/controllers/feeds.ts @@ -1,6 +1,6 @@ import express from 'express' import Feed from 'pfeed' -import { mdToPlainText, toSafeHtml } from '@server/helpers/markdown' +import { mdToOneLinePlainText, toSafeHtml } from '@server/helpers/markdown' import { getServerActor } from '@server/models/application/application' import { getCategoryLabel } from '@server/models/video/formatter/video-format-utils' import { VideoInclude } from '@shared/models' @@ -236,7 +236,7 @@ function initFeed (parameters: { return new Feed({ title: name, - description: mdToPlainText(description), + description: mdToOneLinePlainText(description), // updated: TODO: somehowGetLatestUpdate, // optional, default = today id: webserverUrl, link: webserverUrl, @@ -299,7 +299,7 @@ function addVideosToFeed (feed, videos: VideoModel[]) { title: video.name, id: video.url, link: WEBSERVER.URL + video.getWatchStaticPath(), - description: mdToPlainText(video.getTruncatedDescription()), + description: mdToOneLinePlainText(video.getTruncatedDescription()), content: toSafeHtml(video.description), author: [ { diff --git a/server/helpers/markdown.ts b/server/helpers/markdown.ts index 0b8c2fabc..25685ec6d 100644 --- a/server/helpers/markdown.ts +++ b/server/helpers/markdown.ts @@ -1,14 +1,14 @@ -import { getSanitizeOptions, TEXT_WITH_HTML_RULES } from '@shared/core-utils' +import { getDefaultSanitizeOptions, getTextOnlySanitizeOptions, TEXT_WITH_HTML_RULES } from '@shared/core-utils' -const sanitizeOptions = getSanitizeOptions() +const defaultSanitizeOptions = getDefaultSanitizeOptions() +const textOnlySanitizeOptions = getTextOnlySanitizeOptions() const sanitizeHtml = require('sanitize-html') const markdownItEmoji = require('markdown-it-emoji/light') const MarkdownItClass = require('markdown-it') -const markdownIt = new MarkdownItClass('default', { linkify: true, breaks: true, html: true }) -markdownIt.enable(TEXT_WITH_HTML_RULES) -markdownIt.use(markdownItEmoji) +const markdownItWithHTML = new MarkdownItClass('default', { linkify: true, breaks: true, html: true }) +const markdownItWithoutHTML = new MarkdownItClass('default', { linkify: true, breaks: true, html: false }) const toSafeHtml = (text: string) => { if (!text) return '' @@ -17,29 +17,65 @@ const toSafeHtml = (text: string) => { const textWithLineFeed = text.replace(//g, '\r\n') // Convert possible markdown (emojis, emphasis and lists) to html - const html = markdownIt.render(textWithLineFeed) + const html = markdownItWithHTML.enable(TEXT_WITH_HTML_RULES) + .use(markdownItEmoji) + .render(textWithLineFeed) // Convert to safe Html - return sanitizeHtml(html, sanitizeOptions) + return sanitizeHtml(html, defaultSanitizeOptions) } -const mdToPlainText = (text: string) => { +const mdToOneLinePlainText = (text: string) => { if (!text) return '' - // Convert possible markdown (emojis, emphasis and lists) to html - const html = markdownIt.render(text) + markdownItWithoutHTML.use(markdownItEmoji) + .use(plainTextPlugin) + .render(text) // Convert to safe Html - const safeHtml = sanitizeHtml(html, sanitizeOptions) - - return safeHtml.replace(/<[^>]+>/g, '') - .replace(/\n$/, '') - .replace(/\n/g, ', ') + return sanitizeHtml(markdownItWithoutHTML.plainText, textOnlySanitizeOptions) } // --------------------------------------------------------------------------- export { toSafeHtml, - mdToPlainText + mdToOneLinePlainText +} + +// --------------------------------------------------------------------------- + +// Thanks: https://github.com/wavesheep/markdown-it-plain-text +function plainTextPlugin (markdownIt: any) { + let lastSeparator = '' + + function plainTextRule (state: any) { + const text = scan(state.tokens) + + markdownIt.plainText = text.replace(/\s+/g, ' ') + } + + function scan (tokens: any[]) { + let text = '' + + for (const token of tokens) { + if (token.children !== null) { + text += scan(token.children) + continue + } + + if (token.type === 'list_item_close') { + lastSeparator = ', ' + } else if (/[a-zA-Z]+_close/.test(token.type)) { + lastSeparator = ' ' + } else if (token.content) { + text += lastSeparator + text += token.content + } + } + + return text + } + + markdownIt.core.ruler.push('plainText', plainTextRule) } diff --git a/server/lib/client-html.ts b/server/lib/client-html.ts index 74788af52..19354ab70 100644 --- a/server/lib/client-html.ts +++ b/server/lib/client-html.ts @@ -12,7 +12,7 @@ import { HttpStatusCode } from '../../shared/models/http/http-error-codes' import { VideoPlaylistPrivacy, VideoPrivacy } from '../../shared/models/videos' import { isTestInstance } from '../helpers/core-utils' import { logger } from '../helpers/logger' -import { mdToPlainText } from '../helpers/markdown' +import { mdToOneLinePlainText } from '../helpers/markdown' import { CONFIG } from '../initializers/config' import { ACCEPT_HEADERS, @@ -103,7 +103,7 @@ class ClientHtml { res.status(HttpStatusCode.NOT_FOUND_404) return html } - const description = mdToPlainText(video.description) + const description = mdToOneLinePlainText(video.description) let customHtml = ClientHtml.addTitleTag(html, video.name) customHtml = ClientHtml.addDescriptionTag(customHtml, description) @@ -164,7 +164,7 @@ class ClientHtml { return html } - const description = mdToPlainText(videoPlaylist.description) + const description = mdToOneLinePlainText(videoPlaylist.description) let customHtml = ClientHtml.addTitleTag(html, videoPlaylist.name) customHtml = ClientHtml.addDescriptionTag(customHtml, description) @@ -263,7 +263,7 @@ class ClientHtml { return ClientHtml.getIndexHTML(req, res) } - const description = mdToPlainText(entity.description) + const description = mdToOneLinePlainText(entity.description) let customHtml = ClientHtml.addTitleTag(html, entity.getDisplayName()) customHtml = ClientHtml.addDescriptionTag(customHtml, description) diff --git a/server/tests/helpers/index.ts b/server/tests/helpers/index.ts index 66db93c99..91d11e25d 100644 --- a/server/tests/helpers/index.ts +++ b/server/tests/helpers/index.ts @@ -1,4 +1,5 @@ import './image' import './core-utils' import './comment-model' +import './markdown' import './request' diff --git a/server/tests/helpers/markdown.ts b/server/tests/helpers/markdown.ts new file mode 100644 index 000000000..0488a1a05 --- /dev/null +++ b/server/tests/helpers/markdown.ts @@ -0,0 +1,34 @@ +/* eslint-disable @typescript-eslint/no-unused-expressions,@typescript-eslint/require-await */ + +import 'mocha' +import { mdToOneLinePlainText } from '@server/helpers/markdown' +import { expect } from 'chai' + +describe('Markdown helpers', function () { + + describe('Plain text', function () { + + it('Should convert a list to plain text', function () { + const result = mdToOneLinePlainText(`* list 1 +* list 2 +* list 3`) + + expect(result).to.equal('list 1, list 2, list 3') + }) + + it('Should convert a list with indentation to plain text', function () { + const result = mdToOneLinePlainText(`Hello: + * list 1 + * list 2 + * list 3`) + + expect(result).to.equal('Hello: list 1, list 2, list 3') + }) + + it('Should convert HTML to plain text', function () { + const result = mdToOneLinePlainText(`**Hello** coucou`) + + expect(result).to.equal('Hello coucou') + }) + }) +}) diff --git a/shared/core-utils/renderer/html.ts b/shared/core-utils/renderer/html.ts index c9757be85..502308979 100644 --- a/shared/core-utils/renderer/html.ts +++ b/shared/core-utils/renderer/html.ts @@ -1,4 +1,4 @@ -export function getSanitizeOptions () { +export function getDefaultSanitizeOptions () { return { allowedTags: [ 'a', 'p', 'span', 'br', 'strong', 'em', 'ul', 'ol', 'li' ], allowedSchemes: [ 'http', 'https' ], @@ -23,8 +23,14 @@ export function getSanitizeOptions () { } } +export function getTextOnlySanitizeOptions () { + return { + allowedTags: [] as string[] + } +} + export function getCustomMarkupSanitizeOptions (additionalAllowedTags: string[] = []) { - const base = getSanitizeOptions() + const base = getDefaultSanitizeOptions() return { allowedTags: [ -- 2.41.0