From 83a67c06183029095d4759fb64396f058a70c93a Mon Sep 17 00:00:00 2001 From: Steven Ly <6807939+orcsly@users.noreply.github.com> Date: Fri, 16 Oct 2020 12:58:50 -0700 Subject: [PATCH] DIV-1192: Add link checker. --- .github/workflows/linkcheck.yml | 28 ++++++++++ .gitignore | 1 + .../core/management/commands/link_check.py | 54 +++++++++++++++++++ requirements.txt | 2 + 4 files changed, 85 insertions(+) create mode 100644 .github/workflows/linkcheck.yml create mode 100644 edivorce/apps/core/management/commands/link_check.py diff --git a/.github/workflows/linkcheck.yml b/.github/workflows/linkcheck.yml new file mode 100644 index 00000000..948f1225 --- /dev/null +++ b/.github/workflows/linkcheck.yml @@ -0,0 +1,28 @@ +name: eDivorce - Link Check + +on: + schedule: + - cron: "*/2 * * * *" + +jobs: + build: + + runs-on: ubuntu-latest + strategy: + max-parallel: 4 + matrix: + python-version: [3.6] + + steps: + - uses: actions/checkout@v2 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v2 + with: + python-version: ${{ matrix.python-version }} + - name: Install Dependencies + run: | + python -m pip install --upgrade pip + pip install -r requirements.txt + - name: Run Link Checker + run: | + python manage.py link_check diff --git a/.gitignore b/.gitignore index 38c3a5e0..df78405b 100644 --- a/.gitignore +++ b/.gitignore @@ -32,6 +32,7 @@ var/ .installed.cfg *.egg .python-version +pyvenv.cfg # PyInstaller # Usually these files are written by a python script from a template diff --git a/edivorce/apps/core/management/commands/link_check.py b/edivorce/apps/core/management/commands/link_check.py new file mode 100644 index 00000000..6d8dcebe --- /dev/null +++ b/edivorce/apps/core/management/commands/link_check.py @@ -0,0 +1,54 @@ +import os +import sys +from urllib.request import urlopen + +from bs4 import BeautifulSoup + +from django.conf import settings +from django.core.management.base import BaseCommand + + +class Command(BaseCommand): + help = 'Checks links in the eDivorce application.' + + def _check_link(self, address): + try: + resp = urlopen(address) + if resp.status in [400, 404, 403, 408, 409, 501, 502, 503]: + return f"{resp.status} - {resp.reason}" + except Exception as e: + return f"{e}" + return None + + def handle(self, *args, **options): + errors = [] + + for root, directory, files in os.walk(settings.BASE_DIR + '/apps/core/templates/'): + for file in files: + if '.html' in file: + file_path = os.path.join(root, file) + + fs = open(file_path) + soup = BeautifulSoup(fs, 'html.parser') + links = soup.find_all('a', href=True) + for link in links: + if link is None: + continue + if link['href'].startswith('http'): + filename = str(file_path.name) + + status = self._check_link(link['href']) + if status: + errors.append({ + 'link': link['href'], + 'error': status, + 'file': filename + }) + + if len(errors) > 0: + for error in errors: + print('-------------------------------------------------------------') + print(f'File: {error["file"]}') + print(f'link: {error["link"]}') + print(f'Error: {error["error"]}\r\n') + sys.exit(1) diff --git a/requirements.txt b/requirements.txt index 31d972b5..d468105f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,5 @@ aniso8601==7.0.0 +beautifulsoup4==4.9.3 certifi==2020.6.20 chardet==3.0.4 clamd==1.0.2 @@ -31,6 +32,7 @@ rjsmin==1.1.0 Rx==1.6.1 singledispatch==3.4.0.3 six==1.15.0 +soupsieve==2.0.1 sqlparse==0.3.1 Unidecode==1.1.1 Unipath==1.1