utils: implement html-to-text

Implement a crude html-to-text scraper function, to extract plain text
from html messages, so we can use it for indexing.
This commit is contained in:
Dirk-Jan C. Binnema
2023-07-03 20:29:51 +03:00
parent 23ba61a650
commit 56b8fad89e
3 changed files with 624 additions and 0 deletions

View File

@ -17,6 +17,7 @@
lib_mu_utils=static_library('mu-utils', [
'mu-command-handler.cc',
'mu-html-to-text.cc',
'mu-lang-detector.cc',
'mu-logger.cc',
'mu-option.cc',
@ -43,6 +44,15 @@ lib_mu_utils_dep = declare_dependency(
include_directories(['.', '..', '../thirdparty'])
)
#
# tools
#
html2text = executable('mu-html2text',
'mu-html-to-text.cc',
dependencies: [ lib_mu_utils_dep, glib_dep ],
cpp_args: ['-DBUILD_HTML_TO_TEXT'],
install: false)
#
# tests
#
@ -82,4 +92,11 @@ test('test-lang-detector',
cpp_args: ['-DBUILD_TESTS'],
dependencies: [ config_h_dep, glib_dep, lib_mu_utils_dep ]))
test('test-html-to-text',
executable('test-html-to-text', 'mu-html-to-text.cc',
install: false,
cpp_args: ['-DBUILD_TESTS'],
dependencies: [glib_dep, lib_mu_utils_dep]))
subdir('tests')