From aad0ec24e886a93c5f85cbb8eb4c31ff5e973e1a Mon Sep 17 00:00:00 2001 From: Rigel Kent Date: Fri, 3 Aug 2018 02:02:01 +0200 Subject: [PATCH] advertising PeerTube's rather simple DNT policy --- package.json | 2 + server.ts | 18 +- server/controllers/static.ts | 19 +- server/initializers/constants.ts | 1 + server/middlewares/dnt.ts | 13 ++ server/static/dnt-policy/dnt-policy-1.0.txt | 218 ++++++++++++++++++++ yarn.lock | 21 +- 7 files changed, 287 insertions(+), 5 deletions(-) create mode 100644 server/middlewares/dnt.ts create mode 100644 server/static/dnt-policy/dnt-policy-1.0.txt diff --git a/package.json b/package.json index c7163d8bb..b5d695344 100644 --- a/package.json +++ b/package.json @@ -100,6 +100,7 @@ "flat": "^4.1.0", "fluent-ffmpeg": "^2.1.0", "helmet": "^3.12.1", + "ip-anonymize": "^0.0.6", "ipaddr.js": "https://github.com/whitequark/ipaddr.js.git#8e69afeb4053ee32447a101845f860848280eca5", "is-cidr": "^2.0.5", "iso-639-3": "^1.0.1", @@ -129,6 +130,7 @@ "sequelize-typescript": "0.6.6-beta.1", "sharp": "^0.20.0", "srt-to-vtt": "^1.1.2", + "useragent": "^2.3.0", "uuid": "^3.1.0", "validator": "^10.2.0", "webfinger.js": "^2.6.6", diff --git a/server.ts b/server.ts index 9094ac943..eccd19919 100644 --- a/server.ts +++ b/server.ts @@ -16,6 +16,8 @@ import * as morgan from 'morgan' import * as cors from 'cors' import * as cookieParser from 'cookie-parser' import * as helmet from 'helmet' +import * as useragent from 'useragent' +import * as anonymise from 'ip-anonymize' process.title = 'peertube' @@ -27,7 +29,7 @@ import { checkMissedConfig, checkFFmpeg, checkConfig, checkActivityPubUrls } fro // Do not use barrels because we don't want to load all modules here (we need to initialize database first) import { logger } from './server/helpers/logger' -import { API_VERSION, CONFIG, STATIC_PATHS, CACHE, REMOTE_SCHEME } from './server/initializers/constants' +import { API_VERSION, CONFIG, CACHE } from './server/initializers/constants' const missed = checkMissedConfig() if (missed.length !== 0) { @@ -85,6 +87,7 @@ import { trackerRouter, createWebsocketServer } from './server/controllers' +import { advertiseDoNotTrack } from './server/middlewares/dnt' import { Redis } from './server/lib/redis' import { BadActorFollowScheduler } from './server/lib/schedulers/bad-actor-follow-scheduler' import { RemoveOldJobsScheduler } from './server/lib/schedulers/remove-old-jobs-scheduler' @@ -103,8 +106,17 @@ if (isTestInstance()) { credentials: true })) } - // For the logger +morgan.token('remote-addr', req => { + return (req.get('DNT') === '1') ? + anonymise(req.ip || (req.connection && req.connection.remoteAddress) || undefined, + 16, // bitmask for IPv4 + 16 // bitmask for IPv6 + ) : + req.ip +}) +morgan.token('user-agent', req => (req.get('DNT') === '1') ? + useragent.parse(req.get('user-agent')).family : req.get('user-agent')) app.use(morgan('combined', { stream: { write: logger.info.bind(logger) } })) @@ -116,6 +128,8 @@ app.use(bodyParser.json({ })) // Cookies app.use(cookieParser()) +// W3C DNT Tracking Status +app.use(advertiseDoNotTrack) // ----------- Views, routes and static files ----------- diff --git a/server/controllers/static.ts b/server/controllers/static.ts index 3ccf624a7..2a92810f3 100644 --- a/server/controllers/static.ts +++ b/server/controllers/static.ts @@ -1,4 +1,5 @@ import * as cors from 'cors' +import { createReadStream } from 'fs' import * as express from 'express' import { CONFIG, STATIC_DOWNLOAD_PATHS, STATIC_MAX_AGE, STATIC_PATHS, ROUTE_CACHE_LIFETIME } from '../initializers' import { VideosPreviewCache } from '../lib/cache' @@ -93,10 +94,26 @@ staticRouter.use('/.well-known/nodeinfo', } ) staticRouter.use('/nodeinfo/:version.json', - // asyncMiddleware(cacheRoute(ROUTE_CACHE_LIFETIME.NODEINFO)), + asyncMiddleware(cacheRoute(ROUTE_CACHE_LIFETIME.NODEINFO)), asyncMiddleware(generateNodeinfo) ) +// dnt-policy.txt service (see https://www.eff.org/dnt-policy) +staticRouter.use('/.well-known/dnt-policy.txt', + asyncMiddleware(cacheRoute(ROUTE_CACHE_LIFETIME.DNT_POLICY)), + (_, res: express.Response) => { + res.type('text/plain') + createReadStream('./server/static/dnt-policy/dnt-policy-1.0.txt').pipe(res) + } +) + +// dnt service (see https://www.w3.org/TR/tracking-dnt/#status-resource) +staticRouter.use('/.well-known/dnt/', + (_, res: express.Response) => { + res.json({ tracking: 'N' }) + } +) + // --------------------------------------------------------------------------- export { diff --git a/server/initializers/constants.ts b/server/initializers/constants.ts index 069d9b2e8..74fe7965d 100644 --- a/server/initializers/constants.ts +++ b/server/initializers/constants.ts @@ -55,6 +55,7 @@ const ROUTE_CACHE_LIFETIME = { FEEDS: '15 minutes', ROBOTS: '2 hours', NODEINFO: '10 minutes', + DNT_POLICY: '1 week', ACTIVITY_PUB: { VIDEOS: '1 second' // 1 second, cache concurrent requests after a broadcast for example } diff --git a/server/middlewares/dnt.ts b/server/middlewares/dnt.ts new file mode 100644 index 000000000..cabad39c6 --- /dev/null +++ b/server/middlewares/dnt.ts @@ -0,0 +1,13 @@ +import * as ipaddr from 'ipaddr.js' +import { format } from 'util' + +const advertiseDoNotTrack = (_, res, next) => { + res.setHeader('Tk', 'N') + return next() +} + +// --------------------------------------------------------------------------- + +export { + advertiseDoNotTrack + } diff --git a/server/static/dnt-policy/dnt-policy-1.0.txt b/server/static/dnt-policy/dnt-policy-1.0.txt new file mode 100644 index 000000000..ad946d1f8 --- /dev/null +++ b/server/static/dnt-policy/dnt-policy-1.0.txt @@ -0,0 +1,218 @@ +Do Not Track Compliance Policy + +Version 1.0 + +This domain complies with user opt-outs from tracking via the "Do Not Track" +or "DNT" header [http://www.w3.org/TR/tracking-dnt/]. This file will always +be posted via HTTPS at https://example-domain.com/.well-known/dnt-policy.txt +to indicate this fact. + +SCOPE + +This policy document allows an operator of a Fully Qualified Domain Name +("domain") to declare that it respects Do Not Track as a meaningful privacy +opt-out of tracking, so that privacy-protecting software can better determine +whether to block or anonymize communications with this domain. This policy is +intended first and foremost to be posted on domains that publish ads, widgets, +images, scripts and other third-party embedded hypertext (for instance on +widgets.example.com), but it can be posted on any domain, including those users +visit directly (such as www.example.com). The policy may be applied to some +domains used by a company, site, or service, and not to others. Do Not Track +may be sent by any client that uses the HTTP protocol, including websites, +mobile apps, and smart devices like TVs. Do Not Track also works with all +protocols able to read HTTP headers, including SPDY. + +NOTE: This policy contains both Requirements and Exceptions. Where possible +terms are defined in the text, but a few additional definitions are included +at the end. + +REQUIREMENTS + +When this domain receives Web requests from a user who enables DNT by actively +choosing an opt-out setting in their browser or by installing software that is +primarily designed to protect privacy ("DNT User"), we will take the following +measures with respect to those users' data, subject to the Exceptions, also +listed below: + +1. END USER IDENTIFIERS: + + a. If a DNT User has logged in to our service, all user identifiers, such as + unique or nearly unique cookies, "supercookies" and fingerprints are + discarded as soon as the HTTP(S) response is issued. + + Data structures which associate user identifiers with accounts may be + employed to recognize logged in users per Exception 4 below, but may not + be associated with records of the user's activities unless otherwise + excepted. + + b. If a DNT User is not logged in to our service, we will take steps to ensure + that no user identifiers are transmitted to us at all. + +2. LOG RETENTION: + + a. Logs with DNT Users' identifiers removed (but including IP addresses and + User Agent strings) may be retained for a period of 10 days or less, + unless an Exception (below) applies. This period of time balances privacy + concerns with the need to ensure that log processing systems have time to + operate; that operations engineers have time to monitor and fix technical + and performance problems; and that security and data aggregation systems + have time to operate. + + b. These logs will not be used for any other purposes. + +3. OTHER DOMAINS: + + a. If this domain transfers identifiable user data about DNT Users to + contractors, affiliates or other parties, or embeds from or posts data to + other domains, we will either: + + b. ensure that the operators of those domains abide by this policy overall + by posting it at /.well-known/dnt-policy.txt via HTTPS on the domains in + question, + + OR + + ensure that the recipient's policies and practices require the recipient + to respect the policy for our DNT Users' data. + + OR + + obtain a contractual commitment from the recipient to respect this policy + for our DNT Users' data. + + NOTE: if an “Other Domain” does not receive identifiable user information + from the domain because such information has been removed, because the + Other Domain does not log that information, or for some other reason, these + requirements do not apply. + + c. "Identifiable" means any records which are not Anonymized or otherwise + covered by the Exceptions below. + +4. PERIODIC REASSERTION OF COMPLIANCE: + + At least once every 12 months, we will take reasonable steps commensurate + with the size of our organization and the nature of our service to confirm + our ongoing compliance with this document, and we will publicly reassert our + compliance. + +5. USER NOTIFICATION: + + a. If we are required by law to retain or disclose user identifiers, we will + attempt to provide the users with notice (unless we are prohibited or it + would be futile) that a request for their information has been made in + order to give the users an opportunity to object to the retention or + disclosure. + + b. We will attempt to provide this notice by email, if the users have given + us an email address, and by postal mail if the users have provided a + postal address. + + c. If the users do not challenge the disclosure request, we may be legally + required to turn over their information. + + d. We may delay notice if we, in good faith, believe that an emergency + involving danger of death or serious physical injury to any person + requires disclosure without delay of information relating to the + emergency. + +EXCEPTIONS + +Data from DNT Users collected by this domain may be logged or retained only in +the following specific situations: + +1. CONSENT / "OPT BACK IN" + + a. DNT Users are opting out from tracking across the Web. It is possible + that for some feature or functionality, we will need to ask a DNT User to + "opt back in" to be tracked by us across the entire Web. + + b. If we do that, we will take reasonable steps to verify that the users who + select this option have genuinely intended to opt back in to tracking. + One way to do this is by performing scientifically reasonable user + studies with a representative sample of our users, but smaller + organizations can satisfy this requirement by other means. + + c. Where we believe that we have opt back in consent, our server will + send a tracking value status header "Tk: C" as described in section 6.2 + of the W3C Tracking Preference Expression draft: + + http://www.w3.org/TR/tracking-dnt/#tracking-status-value + +2. TRANSACTIONS + + If a DNT User actively and knowingly enters a transaction with our + services (for instance, clicking on a clearly-labeled advertisement, + posting content to a widget, or purchasing an item), we will retain + necessary data for as long as required to perform the transaction. This + may for example include keeping auditing information for clicks on + advertising links; keeping a copy of posted content and the name of the + posting user; keeping server-side session IDs to recognize logged in + users; or keeping a copy of the physical address to which a purchased + item will be shipped. By their nature, some transactions will require data + to be retained indefinitely. + +3. TECHNICAL AND SECURITY LOGGING: + + a. If, during the processing of the initial request (for unique identifiers) + or during the subsequent 10 days (for IP addresses and User Agent strings), + we obtain specific information that causes our employees or systems to + believe that a request is, or is likely to be, part of a security attack, + spam submission, or fraudulent transaction, then logs of those requests + are not subject to this policy. + + b. If we encounter technical problems with our site, then, in rare + circumstances, we may retain logs for longer than 10 days, if that is + necessary to diagnose and fix those problems, but this practice will not be + routinized and we will strive to delete such logs as soon as possible. + +4. AGGREGATION: + + a. We may retain and share anonymized datasets, such as aggregate records of + readership patterns; statistical models of user behavior; graphs of system + variables; data structures to count active users on monthly or yearly + bases; database tables mapping authentication cookies to logged in + accounts; non-unique data structures constructed within browsers for tasks + such as ad frequency capping or conversion tracking; or logs with truncated + and/or encrypted IP addresses and simplified User Agent strings. + + b. "Anonymized" means we have conducted risk mitigation to ensure + that the dataset, plus any additional information that is in our + possession or likely to be available to us, does not allow the + reconstruction of reading habits, online or offline activity of groups of + fewer than 5000 individuals or devices. + + c. If we generate anonymized datasets under this exception we will publicly + document our anonymization methods in sufficient detail to allow outside + experts to evaluate the effectiveness of those methods. + +5. ERRORS: + +From time to time, there may be errors by which user data is temporarily +logged or retained in violation of this policy. If such errors are +inadvertent, rare, and made in good faith, they do not constitute a breach +of this policy. We will delete such data as soon as practicable after we +become aware of any error and take steps to ensure that it is deleted by any +third-party who may have had access to the data. + +ADDITIONAL DEFINITIONS + +"Fully Qualified Domain Name" means a domain name that addresses a computer +connected to the Internet. For instance, example1.com; www.example1.com; +ads.example1.com; and widgets.example2.com are all distinct FQDNs. + +"Supercookie" means any technology other than an HTTP Cookie which can be used +by a server to associate identifiers with the clients that visit it. Examples +of supercookies include Flash LSO cookies, DOM storage, HTML5 storage, or +tricks to store information in caches or etags. + +"Risk mitigation" means an engineering process that evaluates the possibility +and likelihood of various adverse outcomes, considers the available methods of +making those adverse outcomes less likely, and deploys sufficient mitigations +to bring the probability and harm from adverse outcomes below an acceptable +threshold. + +"Reading habits" includes amongst other things lists of visited DNS names, if +those domains pertain to specific topics or activities, but records of visited +DNS names are not reading habits if those domain names serve content of a very +diverse and general nature, thereby revealing minimal information about the +opinions, interests or activities of the user. diff --git a/yarn.lock b/yarn.lock index ec261e025..206700a87 100644 --- a/yarn.lock +++ b/yarn.lock @@ -3531,6 +3531,10 @@ ioredis@^3.1.4: redis-commands "^1.2.0" redis-parser "^2.4.0" +ip-anonymize@^0.0.6: + version "0.0.6" + resolved "https://registry.yarnpkg.com/ip-anonymize/-/ip-anonymize-0.0.6.tgz#d2c513e448e874e8cc380d03404691b94b018e68" + ip-regex@^2.1.0: version "2.1.0" resolved "https://registry.yarnpkg.com/ip-regex/-/ip-regex-2.1.0.tgz#fa78bf5d2e6913c911ce9f819ee5146bb6d844e9" @@ -4492,7 +4496,7 @@ lowercase-keys@^1.0.0: version "1.0.1" resolved "https://registry.yarnpkg.com/lowercase-keys/-/lowercase-keys-1.0.1.tgz#6f9e30b47084d971a7c820ff15a6c5167b74c26f" -lru-cache@^4.0.1: +lru-cache@4.1.x, lru-cache@^4.0.1: version "4.1.3" resolved "https://registry.yarnpkg.com/lru-cache/-/lru-cache-4.1.3.tgz#a1175cf3496dfc8436c156c334b4955992bce69c" dependencies: @@ -5256,7 +5260,7 @@ os-locale@^1.4.0: dependencies: lcid "^1.0.0" -os-tmpdir@^1.0.0, os-tmpdir@^1.0.1, os-tmpdir@~1.0.1: +os-tmpdir@^1.0.0, os-tmpdir@^1.0.1, os-tmpdir@~1.0.1, os-tmpdir@~1.0.2: version "1.0.2" resolved "https://registry.yarnpkg.com/os-tmpdir/-/os-tmpdir-1.0.2.tgz#bbe67406c79aa85c5cfec766fe5734555dfa1274" @@ -7201,6 +7205,12 @@ tmp@0.0.31: dependencies: os-tmpdir "~1.0.1" +tmp@0.0.x: + version "0.0.33" + resolved "https://registry.yarnpkg.com/tmp/-/tmp-0.0.33.tgz#6d34335889768d21b2bcda0aa277ced3b1bfadf9" + dependencies: + os-tmpdir "~1.0.2" + to-array@0.1.4: version "0.1.4" resolved "https://registry.yarnpkg.com/to-array/-/to-array-0.1.4.tgz#17e6c11f73dd4f3d74cda7a4ff3238e9ad9bf890" @@ -7564,6 +7574,13 @@ user-home@^2.0.0: dependencies: os-homedir "^1.0.0" +useragent@^2.3.0: + version "2.3.0" + resolved "https://registry.yarnpkg.com/useragent/-/useragent-2.3.0.tgz#217f943ad540cb2128658ab23fc960f6a88c9972" + dependencies: + lru-cache "4.1.x" + tmp "0.0.x" + ut_metadata@^3.0.8: version "3.2.2" resolved "https://registry.yarnpkg.com/ut_metadata/-/ut_metadata-3.2.2.tgz#189baf4ec690111ec242d7dfd954fa17242d4d70" -- 2.41.0