Github Mirror / telegraf-plugins: df595a37




Add webmention_io plugin

Add webmention_io plugin

Commit df595a37.

Authored 2022-12-13T09:18:07.000+00:00 by B Tasker in project Github Mirror / telegraf-plugins

+223 lines -0 lines

Commit Signature

Changes

diff --git a/webmention_io/README.md b/webmention_io/README.md
--- a/webmention_io/README.md
+++ b/webmention_io/README.md
# @@ -0,0 +1,112 @@
# +# Webmention.io Telegraf exec plugin
# +
# +An `exec` plugin for Telegraf to poll the [WebMention.io](https://webmention.io) API and retrieve details of recent webmentions.
# +
# +By default, this will write into a measurement called `webmentions` - you can change this at the head of the script.
# +
# +Note that because this plugin is collecting mentions rather than aggregate stats, the resulting data can be quite high cardinality: you may want to downsample for long term storage.
# +
# +----
# +
# +### Tags
# +
# +* `type`: The webmention type (`in-reply-to`, `like-of`, `repost-of`, `bookmark-of`, `mention-of`, `rsvp`, `follow-of`)
# +* `url`: The URL that the mention references (i.e. the URL on your site)
# +* `author`: The author of the mention
# +
# +----
# +
# +### Fields
# +
# +* `id`: The webmention.io ID for this mention
# +* `author_url`: The URL of the author's profile (where available)
# +* `linked_from`: Where the mention was made
# +* `content`: Where the mention is text based (like `in-reply-to`), the first 1000 chars of the comment/reply
# +
# +----
# +
# +### Dependencies
# +
# +You will need two python modules
# +```
# +pip3 install requests dateparser
# +```
# +
# +----
# +
# +### Configuration
# +
# +There are 3 variables at the top of the script
# +```python
# +# The measurement to write mentions into
# +MEASUREMENT = "webmentions"
# +
# +# How far back should we tell the API to search?
# +MINUTES = 60
# +
# +# A list of your API tokens, you'll have 1 per site that you've set up
# +# on webmention.io
# +TOKENS = [""]
# +```
# +
# +You'll need to log into `webmention.io` and grab your API token from the settings page. If you've got multiple domains configured with `webmention.io` you can provide multiple tokens:
# +```python
# +TOKENS = ["abcde", "fghijk"]
# +```
# +----
# +
# +### Setup
# +
# +Configure in telegraf as follows
# +
# +```
# +[[inputs.exec]]
# + commands = [
# + "/usr/local/src/telegraf_plugins/webmention_io.py",
# + ]
# + timeout = "60s"
# + interval = "15m"
# + name_suffix = ""
# + data_format = "influx"
# +```
# +
# +----
# +
# +### Routed Output
# +
# +Because the data is potentially quite high cardinality, you may want to write it into a seperate short-lived database (to then downsample from).
# +
# +The plugin includes a tag `influxdb_database` so that you can achieve this by having multiple Telegraf outputs and using `tagpass` to control which metrics are written to each.
# +
# +```
# +# Main Output
# +[[outputs.influxdb_v2]]
# + urls = ["http://192.168.3.84:8086"]
# + bucket = "telegraf"
# + token = "abcdefg"
# + organization = "1ffffaaaa"
# +
# + [outputs.influxdb_v2.tagdrop]
# + influxdb_database = ["*"]
# +
# +# Webmentions output
# +[[outputs.influxdb_v2]]
# + urls = ["http://127.0.0.1:8086"]
# + bucket = "webmentions"
# + token = "abcdefg"
# + organization = "1ffffaaaa"
# +
# + # drop the routing tag
# + tagexclude = ["influxdb_database"]
# + [outputs.influxdb_v2.tagpass]
# + influxdb_database = ["webmentions"]
# +
# +```
# +
# +----
# +
# +### Copyright
# +
# +Copyright (c) 2022 [Ben Tasker](https://www.bentasker.co.uk)
# +
# +Released under [MIT License](https://www.bentasker.co.uk/pages/licenses/mit-license.html)
#
diff --git a/webmention_io/webmention_io.py b/webmention_io/webmention_io.py
--- a/webmention_io/webmention_io.py
+++ b/webmention_io/webmention_io.py
# @@ -0,0 +1,111 @@
# +#!/usr/bin/env python3
# +#
# +# Call the webmention.io and retrieve recent webmentions
# +#
# +# These are then converted to Line Protocol for writing into InfluxDB
# +#
# +# Warning: This generates a high cardinality dataset, you will almost
# +# certainly want to implement downsampling if you're only interested
# +# in aggregate stats
# +#
# +# pip3 install requests dateparser
# +#
# +'''
# +Copyright (c) 2022 B Tasker
# +
# +Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
# +
# +The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
# +
# +THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
# +'''
# +
# +import datetime as dt
# +import requests
# +
# +from dateutil.parser import parse
# +
# +# The measurement to write mentions into
# +MEASUREMENT = "webmentions"
# +
# +# How far back should we tell the API to search?
# +MINUTES = 60
# +
# +# A list of your API tokens, you'll have 1 per site that you've set up
# +# on webmention.io
# +TOKENS = [""]
# +
# +def build_lp(entry):
# + ''' Build line protocol to describe the mention
# + '''
# +
# + author = entry['author']['name'].replace(' ', '\ ')
# + author_url = entry['author']['url'].replace('"', '')
# +
# + # Convert time to nanosecond epoch
# + do = dt.datetime.strptime(entry['wm-received'], '%Y-%m-%dT%H:%M:%SZ')
# +
# + if "published" in entry and entry['published']:
# + do = parse(entry['published'])
# +
# +
# + mention_date = str(int(do.strftime('%s')) * 1000000000)
# +
# + # ID and type
# + wm_id = entry['wm-id']
# + wm_type = entry['wm-property']
# +
# + # Linked URL
# + url = entry['wm-target'].split("#")[0]
# +
# + # Where they linked from
# + source_url = entry['url'].replace('"', '')
# +
# +
# + # Start putting it all together
# + tagset = [
# + MEASUREMENT,
# + f'type={wm_type}',
# + f'url={url}',
# + f'author={author}',
# + 'influxdb_database=webmentions'
# + ]
# +
# + fieldset = [
# + f'id={wm_id}',
# + f'author_url="{author_url}"',
# + f'linked_from="{source_url}"'
# + ]
# +
# + if "content" in entry:
# + content = entry['content']['text'][0:1000].replace('"', '').replace('\n', ' ').replace('\r', ' ').replace('\\','\\\\')
# + fieldset.append(f'content="{content}"')
# +
# + # Put it all back together and return
# + return ','.join(tagset) + " " + ','.join(fieldset) + f" {mention_date}"
# +
# +
# +def main():
# + ''' Call the API and trigger LP generation
# + '''
# + now = dt.datetime.now()
# + d = dt.timedelta(minutes = MINUTES)
# + a = now - d
# + since = a.strftime('%Y-%m-%dT%H:%M:%SZ')
# +
# +
# + for token in TOKENS:
# + # Call the API
# + r = requests.get(f'https://webmention.io/api/mentions.jf2?token={token}&since={since}')
# + d = r.json()
# +
# + if "children" not in d:
# + continue
# +
# + # Iterate over the result
# + for entry in d['children']:
# + # Print the LP
# + print(build_lp(entry))
# +
# +
# +main()
#