brewserverblocklist/src/brewserverblocklist/brewserverblocklist.py

217 lines
9.3 KiB
Python

#!env python3
# SPDX-FileCopyrightText: 2023 Tobias Diekershoff
#
# SPDX-License-Identifier: GPL-3.0-or-later
"""
This script can be used to create server block lists by compining the blocklists
of any number of other Friendica instances.
See https://git.friendi.ca/tobias/brewserverblocklist
"""
import argparse
import configparser
import sys
from os.path import exists
import requests
class BParser(argparse.ArgumentParser):
"""
This expansion of the ArgParser class will display the --help results by
default if an error occurs (e.g. no arguments are passed to the script).
It is based on an StackOverflow answer from 2010 by unutbu who refered to
a reply from Steven Bethard as source of the code.
https://stackoverflow.com/questions/4042452/display-help-message-with-python-argparse-when-script-is-called-without-any-argu/4042861#4042861
"""
def error(self, message):
sys.stderr.write('error: %s\n' % message)
self.print_help()
sys.exit(2)
class BrewBlocklist():
"""
This is the cauldron that is used to
* collect the blocklists from other servers
* rank the blocklists by their trustworthyness
* compile a the resulting blocklist
"""
def __init__(self, configfile, outputfile, auto_accept = False,
auto_accept_direction = True, confidence = 100):
"""
Initialise the cauldron with the filenames of the config file
and the outputfilename.
"""
self.sources = []
self.auto_accept = auto_accept
self.auto_accept_direction = auto_accept_direction
self.safe_harbor = []
self.error = []
config = configparser.RawConfigParser()
config.read(configfile)
for section in config.sections():
section_values = dict(config.items(section))
if (section.find('http://') > -1) or (section.find('https://') > -1):
print('The section name in the config file must not contain the protocol ({})'.format(section))
sys.exit(1)
if not section == 'safe harbor':
if not 'type' in section_values.keys():
section_values['type'] = 'friendica'
self.sources.append({
'url': section,
'trust': int(section_values['trust']),
'type': section_values['type']
})
else:
for item in section_values['domains'].split(','):
self.safe_harbor.append(item)
self.outputfile = outputfile
self.confidence = confidence
self.blocklist = {}
self.reasons = {}
def collect_ingrediens(self):
"""
Use request.get to collect the blocklists from the Friendica nodes provided
in the config file. Collect the reasons why to block the servers (the first
mention of the server wins) and sum the trust levels of the blocks.
"""
for source in self.sources:
if source['type'] == 'friendica':
# Friendica publishes the blocklist as CSV file
requ = requests.get('https://{}/blocklist/domain/download'.format(source['url']))
if not requ.status_code == requests.codes.ok:
self.error.append('The request to {} failed'.format(sources['url']))
break
for line in requ.text.split('\n'):
try:
pattern, reason = line.split(',')
except ValueError:
# happens in an empty line in the source CSV file, which seems
# to be the last line of the file so we can just break the loop
# one step early and ignore the exception silently.
break
self.blocklist[pattern] = self.blocklist.get(pattern, 0) + source['trust']
self.reasons[pattern] = self.reasons.get(pattern, reason)
elif source['type'] == 'mastodon':
# Mastodon has an API endpoint that contains the information
requ = requests.get('https://{}//api/v1/instance/domain_blocks'.format(source['url']))
if not requ.status_code == requests.codes.ok:
self.error.append('The request to {} failed'.format(sources['url']))
break
try:
for item in requ.json():
self.blocklist[item['domain']] = self.blocklist.get(item['domain'], 0) + source['trust']
self.reasons[item['domain']] = self.reasons.get(item['domain'], item['comment'])
except:
self.error.append('{} returned no valid json to the API call'.format(source['url']))
else:
raise ValueError('{} is not a supported node type, check your config file'.format(source['type']))
def clean_list(self):
"""
Go through the list of blocklist items and ask the user if they want
to keep the item or not.
Interaction will be over-written when the user provided either the
--yes-to-all or --no-to-all commandline parameter.
"""
c_blocklist = {}
c_reasons = {}
for key, value in self.blocklist.items():
if not key in self.safe_harbor:
if value < self.confidence:
if not self.auto_accept:
print('Domain: {} [total trust {}]'.format(key, value))
print('Reason: {}'.format(self.reasons[key]))
keep = input('Keep that entry? [Y/n] > ')
if keep not in ['n', 'N']:
c_blocklist[key] = value
c_reasons[key] = self.reasons[key]
else:
if self.auto_accept_direction:
c_blocklist[key] = value
c_reasons[key] = self.reasons[key]
else:
c_blocklist[key] = value
c_reasons[key] = self.reasons[key]
self.blocklist = c_blocklist
self.reasons = c_reasons
def serve_meal(self):
"""
Print the CSV list of the collected blocklist into either STDOUT or
the output file that was defined as command line parameter.
You can upload the resulting CSV file into Friendica from the admin
panel of your node. Only the 1st and 2nd column is important. The 3rd
column contains the total trust value for the blocklist entry.
"""
if self.outputfile:
out_file = open(self.outputfile, 'w')
orig_stdout = sys.stdout
sys.stdout = out_file
for key, value in self.blocklist.items():
try:
if ("," in self.reasons[key] or " " in self.reasons[key]) and not self.reasons[key].startswith('"'):
self.reasons[key] = '"{}"'.format(self.reasons[key])
except TypeError:
self.reasons[key] = '"no reason given"'
self.error.append("for {} no blocking reason was provided".format(key))
print('{}, {}, {}'.format(key, self.reasons[key], value))
if self.outputfile:
sys.stdout = orig_stdout
out_file.close()
if len(self.error):
print("\n\nWhile creating the blocklist the following problems occured:")
print("\n".join(self.error))
def main():
"""
This will run the script.
* parse the command line arguments
* check the config file is actually there
* put the cauldron on the fireplace
* collect the ingredient
* serve the result
"""
parser = BParser()
parser.add_argument('-c', '--config',
dest='configfile',
required=True,
help='specify the configuration file')
parser.add_argument('-o', '--output',
dest='outputfile',
default=None,
help='specify the output file. STDOUT if none given')
parser.add_argument('-y', '--auto-accept',
dest='auto_accept_direction',
action='store_true',
default=None,
help='accept all blocklist items with trust values < confidence')
parser.add_argument('-n', '--auto-decline',
dest='auto_accept_direction',
action='store_false',
default=None,
help='decline all blocklist items with trust values < confidence')
parser.add_argument('-C', '--confidence',
type=int,
dest='confidence',
default=100,
help='set the needed confidence level for automatically keep item')
args = parser.parse_args()
arg_auto_accept = not args.auto_accept_direction is None
if not exists(args.configfile):
print('The config file {} was not found.'.format(args.configfile))
sys.exit(1)
brew = BrewBlocklist(args.configfile, args.outputfile, arg_auto_accept,
args.auto_accept_direction, args.confidence)
brew.collect_ingrediens()
brew.clean_list()
brew.serve_meal()
if __name__ == '__main__':
main()