217 lines
9.3 KiB
Python
217 lines
9.3 KiB
Python
#!env python3
|
|
# SPDX-FileCopyrightText: 2023 Tobias Diekershoff
|
|
#
|
|
# SPDX-License-Identifier: GPL-3.0-or-later
|
|
"""
|
|
This script can be used to create server block lists by compining the blocklists
|
|
of any number of other Friendica instances.
|
|
|
|
See https://git.friendi.ca/tobias/brewserverblocklist
|
|
"""
|
|
import argparse
|
|
import configparser
|
|
import sys
|
|
from os.path import exists
|
|
import requests
|
|
|
|
class BParser(argparse.ArgumentParser):
|
|
"""
|
|
This expansion of the ArgParser class will display the --help results by
|
|
default if an error occurs (e.g. no arguments are passed to the script).
|
|
|
|
It is based on an StackOverflow answer from 2010 by unutbu who refered to
|
|
a reply from Steven Bethard as source of the code.
|
|
|
|
https://stackoverflow.com/questions/4042452/display-help-message-with-python-argparse-when-script-is-called-without-any-argu/4042861#4042861
|
|
"""
|
|
def error(self, message):
|
|
sys.stderr.write('error: %s\n' % message)
|
|
self.print_help()
|
|
sys.exit(2)
|
|
|
|
class BrewBlocklist():
|
|
"""
|
|
This is the cauldron that is used to
|
|
|
|
* collect the blocklists from other servers
|
|
* rank the blocklists by their trustworthyness
|
|
* compile a the resulting blocklist
|
|
"""
|
|
def __init__(self, configfile, outputfile, auto_accept = False,
|
|
auto_accept_direction = True, confidence = 100):
|
|
"""
|
|
Initialise the cauldron with the filenames of the config file
|
|
and the outputfilename.
|
|
"""
|
|
self.sources = []
|
|
self.auto_accept = auto_accept
|
|
self.auto_accept_direction = auto_accept_direction
|
|
self.safe_harbor = []
|
|
self.error = []
|
|
config = configparser.RawConfigParser()
|
|
config.read(configfile)
|
|
for section in config.sections():
|
|
section_values = dict(config.items(section))
|
|
if (section.find('http://') > -1) or (section.find('https://') > -1):
|
|
print('The section name in the config file must not contain the protocol ({})'.format(section))
|
|
sys.exit(1)
|
|
if not section == 'safe harbor':
|
|
if not 'type' in section_values.keys():
|
|
section_values['type'] = 'friendica'
|
|
self.sources.append({
|
|
'url': section,
|
|
'trust': int(section_values['trust']),
|
|
'type': section_values['type']
|
|
})
|
|
else:
|
|
for item in section_values['domains'].split(','):
|
|
self.safe_harbor.append(item)
|
|
self.outputfile = outputfile
|
|
self.confidence = confidence
|
|
self.blocklist = {}
|
|
self.reasons = {}
|
|
|
|
def collect_ingrediens(self):
|
|
"""
|
|
Use request.get to collect the blocklists from the Friendica nodes provided
|
|
in the config file. Collect the reasons why to block the servers (the first
|
|
mention of the server wins) and sum the trust levels of the blocks.
|
|
"""
|
|
for source in self.sources:
|
|
if source['type'] == 'friendica':
|
|
# Friendica publishes the blocklist as CSV file
|
|
requ = requests.get('https://{}/blocklist/domain/download'.format(source['url']))
|
|
if not requ.status_code == requests.codes.ok:
|
|
self.error.append('The request to {} failed'.format(sources['url']))
|
|
break
|
|
for line in requ.text.split('\n'):
|
|
try:
|
|
pattern, reason = line.split(',')
|
|
except ValueError:
|
|
# happens in an empty line in the source CSV file, which seems
|
|
# to be the last line of the file so we can just break the loop
|
|
# one step early and ignore the exception silently.
|
|
break
|
|
self.blocklist[pattern] = self.blocklist.get(pattern, 0) + source['trust']
|
|
self.reasons[pattern] = self.reasons.get(pattern, reason)
|
|
elif source['type'] == 'mastodon':
|
|
# Mastodon has an API endpoint that contains the information
|
|
requ = requests.get('https://{}//api/v1/instance/domain_blocks'.format(source['url']))
|
|
if not requ.status_code == requests.codes.ok:
|
|
self.error.append('The request to {} failed'.format(sources['url']))
|
|
break
|
|
try:
|
|
for item in requ.json():
|
|
self.blocklist[item['domain']] = self.blocklist.get(item['domain'], 0) + source['trust']
|
|
self.reasons[item['domain']] = self.reasons.get(item['domain'], item['comment'])
|
|
except:
|
|
self.error.append('{} returned no valid json to the API call'.format(source['url']))
|
|
else:
|
|
raise ValueError('{} is not a supported node type, check your config file'.format(source['type']))
|
|
|
|
def clean_list(self):
|
|
"""
|
|
Go through the list of blocklist items and ask the user if they want
|
|
to keep the item or not.
|
|
|
|
Interaction will be over-written when the user provided either the
|
|
--yes-to-all or --no-to-all commandline parameter.
|
|
"""
|
|
c_blocklist = {}
|
|
c_reasons = {}
|
|
for key, value in self.blocklist.items():
|
|
if not key in self.safe_harbor:
|
|
if value < self.confidence:
|
|
if not self.auto_accept:
|
|
print('Domain: {} [total trust {}]'.format(key, value))
|
|
print('Reason: {}'.format(self.reasons[key]))
|
|
keep = input('Keep that entry? [Y/n] > ')
|
|
if keep not in ['n', 'N']:
|
|
c_blocklist[key] = value
|
|
c_reasons[key] = self.reasons[key]
|
|
else:
|
|
if self.auto_accept_direction:
|
|
c_blocklist[key] = value
|
|
c_reasons[key] = self.reasons[key]
|
|
else:
|
|
c_blocklist[key] = value
|
|
c_reasons[key] = self.reasons[key]
|
|
self.blocklist = c_blocklist
|
|
self.reasons = c_reasons
|
|
|
|
def serve_meal(self):
|
|
"""
|
|
Print the CSV list of the collected blocklist into either STDOUT or
|
|
the output file that was defined as command line parameter.
|
|
|
|
You can upload the resulting CSV file into Friendica from the admin
|
|
panel of your node. Only the 1st and 2nd column is important. The 3rd
|
|
column contains the total trust value for the blocklist entry.
|
|
"""
|
|
if self.outputfile:
|
|
out_file = open(self.outputfile, 'w')
|
|
orig_stdout = sys.stdout
|
|
sys.stdout = out_file
|
|
for key, value in self.blocklist.items():
|
|
try:
|
|
if ("," in self.reasons[key] or " " in self.reasons[key]) and not self.reasons[key].startswith('"'):
|
|
self.reasons[key] = '"{}"'.format(self.reasons[key])
|
|
except TypeError:
|
|
self.reasons[key] = '"no reason given"'
|
|
self.error.append("for {} no blocking reason was provided".format(key))
|
|
print('{}, {}, {}'.format(key, self.reasons[key], value))
|
|
if self.outputfile:
|
|
sys.stdout = orig_stdout
|
|
out_file.close()
|
|
if len(self.error):
|
|
print("\n\nWhile creating the blocklist the following problems occured:")
|
|
print("\n".join(self.error))
|
|
|
|
def main():
|
|
"""
|
|
This will run the script.
|
|
|
|
* parse the command line arguments
|
|
* check the config file is actually there
|
|
* put the cauldron on the fireplace
|
|
* collect the ingredient
|
|
* serve the result
|
|
"""
|
|
parser = BParser()
|
|
parser.add_argument('-c', '--config',
|
|
dest='configfile',
|
|
required=True,
|
|
help='specify the configuration file')
|
|
parser.add_argument('-o', '--output',
|
|
dest='outputfile',
|
|
default=None,
|
|
help='specify the output file. STDOUT if none given')
|
|
parser.add_argument('-y', '--auto-accept',
|
|
dest='auto_accept_direction',
|
|
action='store_true',
|
|
default=None,
|
|
help='accept all blocklist items with trust values < confidence')
|
|
parser.add_argument('-n', '--auto-decline',
|
|
dest='auto_accept_direction',
|
|
action='store_false',
|
|
default=None,
|
|
help='decline all blocklist items with trust values < confidence')
|
|
parser.add_argument('-C', '--confidence',
|
|
type=int,
|
|
dest='confidence',
|
|
default=100,
|
|
help='set the needed confidence level for automatically keep item')
|
|
args = parser.parse_args()
|
|
arg_auto_accept = not args.auto_accept_direction is None
|
|
if not exists(args.configfile):
|
|
print('The config file {} was not found.'.format(args.configfile))
|
|
sys.exit(1)
|
|
brew = BrewBlocklist(args.configfile, args.outputfile, arg_auto_accept,
|
|
args.auto_accept_direction, args.confidence)
|
|
brew.collect_ingrediens()
|
|
brew.clean_list()
|
|
brew.serve_meal()
|
|
|
|
if __name__ == '__main__':
|
|
main()
|