brewserverblocklist/src/brewserverblocklist/brewserverblocklist.py

#!env python3
# SPDX-FileCopyrightText: 2023 Tobias Diekershoff
#
# SPDX-License-Identifier: GPL-3.0-or-later
"""
This script can be used to create server block lists by compining the blocklists
of any number of other Friendica instances.

See https://git.friendi.ca/tobias/brewserverblocklist
"""
import argparse
import configparser
import sys
from os.path import exists
import requests

class BParser(argparse.ArgumentParser):
    """
    This expansion of the ArgParser class will display the --help results by
    default if an error occurs (e.g. no arguments are passed to the script).

    It is based on an StackOverflow answer from 2010 by unutbu who refered to
    a reply from Steven Bethard as source of the code.

    https://stackoverflow.com/questions/4042452/display-help-message-with-python-argparse-when-script-is-called-without-any-argu/4042861#4042861
    """
    def error(self, message):
        sys.stderr.write('error: %s\n' % message)
        self.print_help()
        sys.exit(2)

class BrewBlocklist():
    """
    This is the cauldron that is used to

    * collect the blocklists from other servers
    * rank the blocklists by their trustworthyness
    * compile a the resulting blocklist
    """
    def __init__(self, configfile, outputfile, auto_accept = False,
                 auto_accept_direction = True, confidence = 100):
        """
        Initialise the cauldron with the filenames of the config file
        and the outputfilename.
        """
        self.sources = []
        self.auto_accept = auto_accept
        self.auto_accept_direction = auto_accept_direction
        self.safe_harbor = []
        self.error = []
        config = configparser.RawConfigParser()
        config.read(configfile)
        for section in config.sections():
            section_values = dict(config.items(section))
            if (section.find('http://') > -1) or (section.find('https://') > -1):
                print('The section name in the config file must not contain the protocol ({})'.format(section))
                sys.exit(1)
            if not section == 'safe harbor':
                if not 'type' in section_values.keys():
                    section_values['type'] = 'friendica'
                self.sources.append({
                    'url': section,
                    'trust': int(section_values['trust']),
                    'type': section_values['type']
                })
            else:
                for item in section_values['domains'].split(','):
                    self.safe_harbor.append(item)
        self.outputfile = outputfile
        self.confidence = confidence
        self.blocklist = {}
        self.reasons = {}

    def collect_ingrediens(self):
        """
        Use request.get to collect the blocklists from the Friendica nodes provided
        in the config file. Collect the reasons why to block the servers (the first
        mention of the server wins) and sum the trust levels of the blocks.
        """
        for source in self.sources:
            if source['type'] == 'friendica':
                # Friendica publishes the blocklist as CSV file
                requ = requests.get('https://{}/blocklist/domain/download'.format(source['url']))
                if not requ.status_code == requests.codes.ok:
                    self.error.append('The request to {} failed'.format(sources['url']))
                    break
                for line in requ.text.split('\n'):
                    try:
                        pattern, reason = line.split(',')
                    except ValueError:
                        # happens in an empty line in the source CSV file, which seems
                        # to be the last line of the file so we can just break the loop
                        # one step early and ignore the exception silently.
                        break
                    self.blocklist[pattern] = self.blocklist.get(pattern, 0) + source['trust']
                    self.reasons[pattern] = self.reasons.get(pattern, reason)
            elif source['type'] == 'mastodon':
                # Mastodon has an API endpoint that contains the information
                requ = requests.get('https://{}//api/v1/instance/domain_blocks'.format(source['url']))
                if not requ.status_code == requests.codes.ok:
                    self.error.append('The request to {} failed'.format(sources['url']))
                    break
                try:
                    for item in requ.json():
                        self.blocklist[item['domain']] = self.blocklist.get(item['domain'], 0) + source['trust']
                        self.reasons[item['domain']] = self.reasons.get(item['domain'], item['comment'])
                except:
                    self.error.append('{} returned no valid json to the API call'.format(source['url']))
            else:
                raise ValueError('{} is not a supported node type, check your config file'.format(source['type']))

    def clean_list(self):
        """
        Go through the list of blocklist items and ask the user if they want
        to keep the item or not.

        Interaction will be over-written when the user provided either the
        --yes-to-all or --no-to-all commandline parameter.
        """
        c_blocklist = {}
        c_reasons = {}
        for key, value in self.blocklist.items():
            if not key in self.safe_harbor:
                if value < self.confidence:
                    if not self.auto_accept:
                        print('Domain: {} [total trust {}]'.format(key, value))
                        print('Reason: {}'.format(self.reasons[key]))
                        keep = input('Keep that entry? [Y/n] > ')
                        if keep not in ['n', 'N']:
                            c_blocklist[key] = value
                            c_reasons[key] = self.reasons[key]
                    else:
                        if self.auto_accept_direction:
                            c_blocklist[key] = value
                            c_reasons[key] = self.reasons[key]
                else:
                    c_blocklist[key] = value
                    c_reasons[key] = self.reasons[key]
        self.blocklist = c_blocklist
        self.reasons = c_reasons

    def serve_meal(self):
        """
        Print the CSV list of the collected blocklist into either STDOUT or
        the output file that was defined as command line parameter.

        You can upload the resulting CSV file into Friendica from the admin
        panel of your node. Only the 1st and 2nd column is important. The 3rd
        column contains the total trust value for the blocklist entry.
        """
        if self.outputfile:
            out_file = open(self.outputfile, 'w')
            orig_stdout = sys.stdout
            sys.stdout = out_file
        for key, value in self.blocklist.items():
            try:
                if ("," in self.reasons[key] or " " in self.reasons[key]) and not self.reasons[key].startswith('"'):
                    self.reasons[key] = '"{}"'.format(self.reasons[key])
            except TypeError:
                self.reasons[key] = '"no reason given"'
                self.error.append("for {} no blocking reason was provided".format(key))
            print('{}, {}, {}'.format(key, self.reasons[key], value))
        if self.outputfile:
            sys.stdout = orig_stdout
            out_file.close()
        if len(self.error):
            print("\n\nWhile creating the blocklist the following problems occured:")
            print("\n".join(self.error))

def main():
    """
    This will run the script.

    * parse the command line arguments
    * check the config file is actually there
    * put the cauldron on the fireplace
    * collect the ingredient
    * serve the result
    """
    parser = BParser()
    parser.add_argument('-c', '--config',
                        dest='configfile',
                        required=True,
                        help='specify the configuration file')
    parser.add_argument('-o', '--output',
                        dest='outputfile',
                        default=None,
                        help='specify the output file. STDOUT if none given')
    parser.add_argument('-y', '--auto-accept',
                        dest='auto_accept_direction',
                        action='store_true',
                        default=None,
                        help='accept all blocklist items with trust values < confidence')
    parser.add_argument('-n', '--auto-decline',
                        dest='auto_accept_direction',
                        action='store_false',
                        default=None,
                        help='decline all blocklist items with trust values < confidence')
    parser.add_argument('-C', '--confidence',
                        type=int,
                        dest='confidence',
                        default=100,
                        help='set the needed confidence level for automatically keep item')
    args = parser.parse_args()
    arg_auto_accept = not args.auto_accept_direction is None
    if not exists(args.configfile):
        print('The config file {} was not found.'.format(args.configfile))
        sys.exit(1)
    brew = BrewBlocklist(args.configfile, args.outputfile, arg_auto_accept,
                         args.auto_accept_direction, args.confidence)
    brew.collect_ingrediens()
    brew.clean_list()
    brew.serve_meal()

if __name__ == '__main__':
    main()