Skip to content

Commit

Permalink
Add basic markdown html sanitizer (#116)
Browse files Browse the repository at this point in the history
  • Loading branch information
Terisback committed Sep 29, 2023
1 parent 9e030cb commit e26ec53
Show file tree
Hide file tree
Showing 3 changed files with 238 additions and 2 deletions.
234 changes: 234 additions & 0 deletions src/lib/html/sanitize.v
Original file line number Diff line number Diff line change
@@ -0,0 +1,234 @@
module html

import maps
import net.html as net_html
import net.urllib

const allowed_tags = [
'h1',
'h2',
'h3',
'h4',
'h5',
'h6',
'h7',
'h8',
'br',
'b',
'i',
'strong',
'em',
'a',
'pre',
'code',
'img',
'tt',
'div',
'ins',
'del',
'sup',
'sub',
'p',
'ol',
'ul',
'text',
'table',
'thead',
'tbody',
'tfoot',
'blockquote',
'dl',
'dt',
'dd',
'kbd',
'q',
'samp',
'var',
'hr',
'ruby',
'rt',
'rp',
'li',
'tr',
'td',
'th',
's',
'strike',
'summary',
'details',
]!

const allowed_attributes = [
'abbr',
'accept',
'accept-charset',
'accesskey',
'action',
'align',
'alt',
'axis',
'border',
'class',
'cellpadding',
'cellspacing',
'char',
'charoff',
'charset',
'checked',
'clear',
'cols',
'colspan',
'color',
'compact',
'coords',
'datetime',
'dir',
'disabled',
'enctype',
'for',
'frame',
'headers',
'height',
'hreflang',
'hspace',
'ismap',
'label',
'lang',
'maxlength',
'media',
'method',
'multiple',
'name',
'nohref',
'noshade',
'nowrap',
'open',
'prompt',
'readonly',
'rel',
'rev',
'rows',
'rowspan',
'rules',
'scope',
'selected',
'shape',
'size',
'span',
'start',
'summary',
'tabindex',
'target',
'title',
'type',
'usemap',
'valign',
'value',
'vspace',
'width',
'itemprop',
]!

pub fn sanitize(text string) string {
dom := net_html.parse(text)

mut root := dom.get_root()
if root == unsafe { nil } {
return ''
}

for t in root.children {
traverse_and_sanitize(&t)
}

unsafe {
root.children = root.children.filter(it != nil)
}
return root.str()
}

fn traverse_and_sanitize(tag &&net_html.Tag) {
if tag.name == 'br' {
unsafe {
tag.close_type = .in_name
}
}

// Filter allowed tags
if tag.name !in html.allowed_tags {
match true {
tag.name == 'input' && tag.attributes['type'] == 'checkbox' {}
else {
println('met ${tag.name} EXTERMINATE')
unsafe {
*tag = nil
}
return
}
}
}

// Filter allowed attributes
unsafe {
match true {
// Filter protocols for a with href
tag.name == 'a' {
mut attributes := map[string]string{}
for k, v in tag.attributes {
if k == 'href' {
url := urllib.parse(v) or { urllib.URL{} }
if url.scheme in ['http', 'https', 'mailto', 'github-windows', 'github-mac',
'x-github-client']! {
attributes['href'] = v
}
} else if k in html.allowed_attributes {
attributes[k] = v
}
}
tag.attributes = attributes.move()
}
// Filter protocols for specific tags
tag.name in ['blockquote', 'del', 'ins', 'q']! {
mut attributes := map[string]string{}
for k, v in tag.attributes {
if k == 'cite' {
url := urllib.parse(v) or { urllib.URL{} }
if url.scheme in ['http', 'https']! {
attributes['cite'] = v
}
} else if k in html.allowed_attributes {
attributes[k] = v
}
}
tag.attributes = attributes.move()
}
// Filter protocols for img with src
tag.name == 'img' {
mut attributes := map[string]string{}
for k, v in tag.attributes {
if k in ['src', 'longdesc']! {
url := urllib.parse(v) or { urllib.URL{} }
if url.scheme in ['http', 'https']! {
attributes[k] = v
}
} else if k in html.allowed_attributes {
attributes[k] = v
}
}
tag.attributes = attributes.move()
}
else {
tag.attributes = maps.filter(tag.attributes, fn (k string, v string) bool {
return k in html.allowed_attributes
})
}
}
}
for i := 0; i < tag.children.len; i++ {
traverse_and_sanitize(&tag.children[i])
}

unsafe {
tag.children = tag.children.filter(it != nil)
}
}
3 changes: 2 additions & 1 deletion src/package.v
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ module main
import vweb
import lib.log
import lib.storage
import lib.html
import markdown
import entity { Package }

Expand Down Expand Up @@ -71,7 +72,7 @@ pub fn (mut app App) package(name string) vweb.Result {
return app.redirect('/')
}

rendered := markdown.to_html(readme).bytes()
rendered := html.sanitize(markdown.to_html(readme)).bytes()

app.storage.save(readme_path, rendered) or {
println('failed to save readme to storage: ${err}')
Expand Down
3 changes: 2 additions & 1 deletion src/usecase/package/packages.v
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,8 @@ pub fn (u UseCase) update_package_info(package_id int, name string, url string,
}
}

u.packages.update_package_info(package_id, usr.username + '.' + name.limit(package.max_name_len), repo_url, description)!
u.packages.update_package_info(package_id, usr.username + '.' + name.limit(package.max_name_len),
repo_url, description)!
}

pub fn check_vcs(url string, username string) !string {
Expand Down

0 comments on commit e26ec53

Please sign in to comment.