From 2b4f391559586af734d5d5cb5886aa78c4f9557d Mon Sep 17 00:00:00 2001 From: nodiscc Date: Thu, 30 Aug 2018 19:36:22 +0200 Subject: add "noindex, nofollow" HTML robots meta-tag to documentation pages - Customize the "readthedocs" mkdocs theme: https://www.mkdocs.org/user-guide/styling-your-docs/#customizing-a-theme - Adds a '' HTML tag on each page - Do not include robots directive on readthedocs.org, only in local builds --- doc/custom_theme/main.html | 25 +++++++++++++++++++++++++ mkdocs.yml | 4 +++- 2 files changed, 28 insertions(+), 1 deletion(-) create mode 100644 doc/custom_theme/main.html diff --git a/doc/custom_theme/main.html b/doc/custom_theme/main.html new file mode 100644 index 00000000..69b398f6 --- /dev/null +++ b/doc/custom_theme/main.html @@ -0,0 +1,25 @@ +{% extends "base.html" %} + +{# +The entry point for the ReadTheDocs Theme. + +Any theme customisations should override this file to redefine blocks defined in +the various templates. The custom theme should only need to define a main.html +which `{% extends "base.html" %}` and defines various blocks which will replace +the blocks defined in base.html and its included child templates. +#} + +{%- block site_meta %} + + + + +{%- if 'media.readthedocs.org' not in config.extra_css[0] %} + +{%- endif %} + +{% if page and page.is_homepage %}{% endif %} +{% if config.site_author %}{% endif %} +{% if config.site_favicon %} +{% else %}{% endif %} +{%- endblock %} diff --git a/mkdocs.yml b/mkdocs.yml index 941fce3a..248fdbfe 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -2,7 +2,9 @@ site_name: Shaarli Documentation repo_url: https://github.com/shaarli/Shaarli edit_uri: edit/master/doc/md site_description: The personal, minimalist, super-fast, database free, bookmarking service -theme: readthedocs +theme: + name: readthedocs + custom_dir: doc/custom_theme/ docs_dir: doc/md site_dir: doc/html # Disable strict mode until ReadTheDocs provides up-to-date MkDocs settings: -- cgit v1.2.3 From 6c44d604a1ee0360a2eaf24b9cac18ca95edcb5f Mon Sep 17 00:00:00 2001 From: nodiscc Date: Thu, 30 Aug 2018 20:03:00 +0200 Subject: doc: server config: basic usage of robots.txt/HTML robots meta-tag/crawler control mechanisms --- doc/md/Server-configuration.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/doc/md/Server-configuration.md b/doc/md/Server-configuration.md index e281dc85..cf44ecf5 100644 --- a/doc/md/Server-configuration.md +++ b/doc/md/Server-configuration.md @@ -397,6 +397,7 @@ http { ``` ## Proxies + If Shaarli is served behind a proxy (i.e. there is a proxy server between clients and the web server hosting Shaarli), please refer to the proxy server documentation for proper configuration. In particular, you have to ensure that the following server variables are properly set: - `X-Forwarded-Proto` @@ -405,6 +406,12 @@ If Shaarli is served behind a proxy (i.e. there is a proxy server between client See also [proxy-related](https://github.com/shaarli/Shaarli/issues?utf8=%E2%9C%93&q=label%3Aproxy+) issues. +## Robots and crawlers + +Shaarli disallows indexing and crawling of your local documentation pages by search engines, using `` HTML tags. +Your Shaarli instance and other pages you host may still be indexed by various robots on the public Internet. +You may want to setup a robots.txt file or other crawler control mechanism on your server. +See [[1]](https://en.wikipedia.org/wiki/Robots_exclusion_standard), [[2]](https://support.google.com/webmasters/answer/6062608?hl=en) and [[3]](https://developers.google.com/search/reference/robots_meta_tag) ## See also -- cgit v1.2.3