from urllib.parse import urlencode as mk_qs
import urllib.request
-import oauth
+import auth
import util
def walk_pages(opener, url):
class ApiInterface:
- def __init__(self, host, path, storage_mgr):
- = host
- self.path = path
- self.opener = urllib.request.build_opener(
- oauth.AuthHandler(storage_mgr, self),
- )
+ def __init__(self, bb_root, auth_handler):
+ self.bb_root = bb_root
+ self.opener = urllib.request.build_opener(auth_handler)
def build_api_url(self, endpoint, query = ''):
return urllib.parse.urlunparse(urllib.parse.ParseResult(
scheme = 'https',
- netloc =,
- path = str(self.path.joinpath(endpoint)),
+ netloc =,
+ path = str(self.bb_root.path.joinpath('learn/api/public', endpoint)),
query = query,
params = '',
fragment = '',
--- /dev/null
+# Copyright 2019, Jakob Cornell
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# GNU General Public License for more details.
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <>.
+import base64
+from datetime import timedelta
+import http.server
+import http.cookiejar
+import os
+from pathlib import PurePosixPath
+import urllib.parse
+import urllib.request
+import common
+import util
+ADDRESS = 'localhost'
+PORT = 1081
+ENDPOINT = '/redirection_endpoint'
+TIMEOUT = timedelta(minutes = 30)
+LOGGER = common.LOGGER.getChild('auth')
+class OauthCodeRequestHandler(http.server.BaseHTTPRequestHandler):
+ LANDING_PATH = '/landing'
+ def __init__(self, channel, csrf_token):
+ = channel
+ self.csrf_token = csrf_token
+ self.status = None
+ self.message = None
+ self.caller_val = None
+ def __call__(self, *args):
+ # Yep, `socketserver' invokes the handler by calling it, and the superclass's
+ # code for dispatching requests to the appropriate methods is in the constructor.
+ # If you're confused, it's because it doesn't make any sense.
+ super().__init__(*args)
+ def do_GET(self):
+ url = urllib.parse.urlparse(self.path)
+ params = urllib.parse.parse_qs(url.query)
+ path = PurePosixPath(url.path)
+ if path == PurePosixPath(ENDPOINT):
+ REQD_PARAMS = {'code', 'state'}
+ if params.keys() == REQD_PARAMS and all(len(params[k]) == 1 for k in REQD_PARAMS):
+ [code] = params['code']
+ [token] = params['state']
+ if token == self.csrf_token:
+ self.status = 200
+ self.message = "Success! You may close this page."
+ self._redirect()
+ self.caller_val = code
+ else:
+ self.status = 403
+ self.message = "CSRF token check failed"
+ self._redirect()
+ else:
+ self.status = 400
+ self.message = "Auth server redirect missing required parameters"
+ self._redirect()
+ self.caller_val = AuthError(self.message)
+ elif path == PurePosixPath(self.LANDING_PATH):
+ self.send_response(self.status)
+ self.end_headers()
+ self.wfile.write(self.message.encode('ascii'))
+ if self.caller_val is not None:
+ else:
+ self.send_response(404)
+ self.end_headers()
+ self.wfile.write('Not Found'.encode('ascii'))
+ def _redirect(self):
+ self.send_response(302)
+ self.send_header('Location', self.LANDING_PATH)
+ self.end_headers()
+ def log_message(self, *_):
+ pass
+def _make_pkce_pair():
+ from collections import namedtuple
+ import hashlib
+ verifier = base64.urlsafe_b64encode(os.urandom(32)).strip(b'=')
+ hasher = hashlib.sha256()
+ hasher.update(verifier)
+ challenge = base64.urlsafe_b64encode(hasher.digest()).strip(b'=')
+ PkcePair = namedtuple('PkcePair', ['verifier', 'challenge'])
+ return PkcePair(verifier.decode('ascii'), challenge.decode('ascii'))
+class AuthError(Exception):
+ pass
+class OauthHandler(urllib.request.BaseHandler):
+ # Precede the HTTP error handler
+ handler_order = 750
+ def __init__(self, storage_mgr, api_iface):
+ self.storage_mgr = storage_mgr
+ self.api_iface = api_iface
+ @staticmethod
+ def _set_auth_header(request, token_doc):
+ request.add_unredirected_header(
+ 'Authorization', 'Bearer {}'.format(token_doc['access_token'])
+ )
+ def _handle(self, request):
+ token_doc = self.storage_mgr.get('authInfo')
+ if not token_doc:
+"No stored access token. Requesting a new token.")
+ token_doc = get_access_token(self.storage_mgr, self.api_iface)
+ self.storage_mgr['authInfo'] = token_doc
+ self._set_auth_header(request, token_doc)
+ return request
+ http_request = _handle
+ https_request = _handle
+ def http_error_401(self, request, fp, code, msg, headers):
+"Access token expired or revoked. Requesting a new token.")
+ token_doc = get_access_token(self.storage_mgr, self.api_iface)
+ self.storage_mgr['authInfo'] = token_doc
+ self._set_auth_header(request, token_doc)
+ return, timeout = request.timeout)
+def get_access_token(storage_mgr, api_iface):
+ import json
+ import queue
+ from threading import Thread
+ import webbrowser
+ client_id = storage_mgr['clientId']
+ client_secret = storage_mgr['clientSecret']
+ payload = base64.b64encode(
+ (client_id + ':' + client_secret).encode('utf-8')
+ ).decode('ascii')
+ handlers = [
+ urllib.request.HTTPSHandler(),
+ util.HeaderAddHandler([('Authorization', 'Basic {}'.format(payload))]),
+ ]
+ opener = urllib.request.OpenerDirector()
+ for handler in handlers:
+ opener.add_handler(handler)
+ token_doc = None
+ if 'authInfo' in storage_mgr:
+ refresh_token = storage_mgr['authInfo']['refresh_token']
+ params = {
+ 'grant_type': 'refresh_token',
+ 'refresh_token': refresh_token,
+ }
+ request = urllib.request.Request(
+ url = api_iface.build_api_url('v1/oauth2/token'),
+ data = urllib.parse.urlencode(params).encode('utf-8'),
+ method = 'POST',
+ )
+ with as resp:
+ body = json.loads(util.decode_body(resp))
+ if resp.status == 200:
+ token_doc = body
+ else:
+"Stored refresh token rejected. Obtaining new authorization code.")
+ assert resp.status == 400
+ if token_doc is None:
+ redirect_uri = 'http://' + ADDRESS + ':' + str(PORT) + ENDPOINT
+ pkce_pair = _make_pkce_pair()
+ csrf_token = base64.urlsafe_b64encode(os.urandom(24)).decode('ascii')
+ params = {
+ 'redirect_uri': redirect_uri,
+ 'response_type': 'code',
+ 'client_id': client_id,
+ 'scope': 'read offline',
+ 'state': csrf_token,
+ 'code_challenge': pkce_pair.challenge,
+ 'code_challenge_method': 'S256',
+ }
+ user_url = api_iface.build_api_url(
+ endpoint = 'v1/oauth2/authorizationcode',
+ query = urllib.parse.urlencode(params),
+ )
+ channel = queue.Queue()
+ server = http.server.HTTPServer(
+ OauthCodeRequestHandler(channel, csrf_token)
+ )
+ Thread(target = server.serve_forever, daemon = True).start()
+"Attempting to launch a web browser to authorize the application…")
+ if not
+"Failed to launch a browser. Please visit this URL to authorize the application:")
+" {}".format(user_url))
+ try:
+ resp = channel.get(timeout = TIMEOUT.total_seconds())
+ except queue.Empty:
+ resp = None
+ server.shutdown()
+ if resp is None:
+ raise AuthError("Authorization timed out. Please try again.")
+ elif isinstance(resp, Exception):
+ raise resp
+ else:
+ auth_code = resp
+ params = {
+ 'grant_type': 'authorization_code',
+ 'code': auth_code,
+ 'code_verifier': pkce_pair.verifier,
+ 'redirect_uri': redirect_uri,
+ }
+ request = urllib.request.Request(
+ url = api_iface.build_api_url('v1/oauth2/token'),
+ data = urllib.parse.urlencode(params).encode('utf-8'),
+ method = 'POST',
+ )
+ with as resp:
+ assert resp.status == 200
+ token_doc = json.loads(util.decode_body(resp))
+ return token_doc
+class StorageMgrCookieJar(http.cookiejar.CookieJar):
+ def __init__(self, storage_mgr):
+ self.storage_mgr = storage_mgr
+ def load(self):
+ if 'cookies' not in self.storage_mgr.keys():
+ self.storage_mgr['cookies'] = {}
+ self._cookies = self.storage_mgr['cookies']
+ def save(self):
+ self.storage_mgr['cookies'] = self._cookies
+class CookieAuthHandler(urllib.request.HTTPCookieProcessor):
+ # Precede the HTTP error handler
+ handler_order = 750
+ def __init__(self, storage_mgr, bb_root, ui, *args):
+ super().__init__(*args)
+ self.bb_root = bb_root
+ self.storage_mgr = storage_mgr
+ self.ui = ui
+ def log_in(self):
+ import base64
+ import bs4
+ encode = lambda s: base64.b64encode(s.encode('utf-8')).decode('ascii')
+ decode = lambda s: base64.b64decode(s.encode('ascii')).decode('utf-8')
+ # Obfuscate password to prevent accidental discovery
+ if {'username', 'password'} <= self.storage_mgr.keys():
+ username = storage_mgr['username']
+ password = decode(storage_mgr['password'])
+ else:
+ username = self.ui.ask_username()
+ password = self.ui.ask_password()
+ storage_mgr['username'] = username
+ storage_mgr['password'] = encode(password)
+ url = + str(self.bb_root.path)
+ with as resp:
+ soup = bs4.BeautifulSoup(resp, 'lxml')
+ [form] ='#login-form > form[name="login"]')
+ to_keep = lambda elem: (
+ elem['type'] != 'submit'
+ and elem['name'] not in {'user_id', 'password'}
+ )
+ inputs = filter(to_keep, form.find_all('input'))
+ params = dict([
+ ('user_id', username),
+ ('password', password),
+ *((elem['name'], elem['value']) for elem in inputs)
+ ])
+ body = urllib.parse.urlencode(params).encode('ascii')
+ req = urllib.request.Request(
+ util.resolve(form['action'], url),
+ method = form['method'],
+ data = body,
+ )
+ with as resp:
+ def http_error_401(self, request, fp, code, msg, headers):
+"Session cookies missing or expired. Logging in…")
+ self.log_in()
+ return, timeout = request.timeout)
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <>.
-from collections import deque
+from collections import deque, namedtuple
import json
import logging
from pathlib import PurePosixPath
import urllib.request
-API_HOST = ''
-API_PATH = PurePosixPath('/learn/api/public')
+BlackboardRoot = namedtuple('BlackboardRoot', ['host', 'path'])
+BB_ROOT = BlackboardRoot(
+ host = '',
+ path = PurePosixPath('/'),
LOGGER = logging.getLogger('bb-sync-api')
-class StorageManager:
+class Adapter(logging.LoggerAdapter):
+ def process(self, msg, kwargs):
+ return (' ' * kwargs['indent'] + msg, kwargs)
+LOGGER = Adapter(LOGGER, {})
+LOGGER.debug('some message', indent = 1)
+class IndentingFormatter(logging.Formatter):
+ def format(self, record):
+ prefix = ' ' * record.indent if hasattr(record, 'indent') else ''
+ return prefix + None
+LOGGER.setFormatter(logging.Formatter('', style = '{'))
+class StorageManager():
def __init__(self, path):
self.path = path
self.patch = {}
return self.cache.get(key)
+ def keys(self):
+ return self.cache.keys() | self.patch.keys()
def __contains__(self, key):
if key in self.patch:
return True
import urllib.request
from common import *
-import oauth
import api
+import auth
import fs
+class Cli:
+ @staticmethod
+ def ask_username():
+ return input("Blackboard username: ")
+ @staticmethod
+ def ask_password():
+ from getpass import getpass
+ return getpass("Blackboard password: ")
cfg_parser = configparser.ConfigParser()
with open('config.ini') as f:
cfg_section = cfg_parser['config']
-with StorageManager(Path('saved_state')) as storage_mgr:
- api_iface = api.ApiInterface(API_HOST, API_PATH, storage_mgr)
+with StorageManager(Path('auth_cache')) as storage_mgr:
+ auth_handler = auth.CookieAuthHandler(
+ storage_mgr,
+ Cli,
+ auth.StorageMgrCookieJar(storage_mgr),
+ )
+ api_iface = api.ApiInterface(BB_ROOT, auth_handler)
url = input("Blackboard URL: ")
params = urllib.parse.parse_qs(urllib.parse.urlparse(url).query)
att_id = attachment_doc['id']" Checking attachment {id}: \"{fileName}\"".format(**attachment_doc))
- with api_iface.download_attachment(course_id, child_doc['id'], att_id) as resp:
- with tempfile.NamedTemporaryFile(delete = False) as tmp:
- tmp_path = Path(
- shutil.copyfileobj(resp, tmp)
+ class Result:
+ NoVersions = namedtuple('NoVersions', [])
+ MultipleLatest = namedtuple('MultipleLatest', ['paths'])
+ SingleLatest = namedtuple('SingleLatest', ['version', 'matches'])
+ @staticmethod
+ def to_update(result):
+ return (
+ isinstance(result, Result.SingleLatest) and not result.matches
+ or isinstance(result, Result.NoVersions)
+ )
my_versions = [info for info in versions.keys() if info.bb_id == att_id]
if my_versions:
latest = max(my_versions, key = attrgetter('version'))
- [latest_path] = versions[latest]
- match = filecmp.cmp(str(tmp_path), str(latest_path), shallow = False)
+ latest_paths = versions[latest]
+ if len(latest_paths) == 1:
+ [latest_path] = latest_paths
+ with api_iface.download_attachment(course_id, child_doc['id'], att_id) as resp:
+ with tempfile.NamedTemporaryFile(delete = False) as tmp:
+ tmp_path = Path(
+ shutil.copyfileobj(resp, tmp)
+ matches = filecmp.cmp(str(tmp_path), str(latest_path), shallow = False)
+ result = Result.SingleLatest(latest, matches)
+ else:
+ result = Result.MultipleLatest(latest_paths)
- match = None
+ result = Result.NoVersions()
- if match is True:
- tmp_path.unlink()
- else:
- if match is False:
- new_version =
+ if Result.to_update(result):
+ if isinstance(result, Result.SingleLatest):
+ new_version =" Found new revision ({})".format(new_version.version))
new_version = fs.VersionInfo(att_id, 0)" Destination: {}".format(dest))
fs.write_metadata(dest, new_version)
+ elif isinstance(result, Result.SingleLatest):
+ # versions match
+ tmp_path.unlink()
+ elif isinstance(result, Result.MultipleLatest):
+ LOGGER.error(" Identified multiple latest versions for {id}: {fileName}"
+ .format(**attachment_doc)
+ )
+ LOGGER.error(" Please delete ")
+++ /dev/null
-# Copyright 2019, Jakob Cornell
-# This program is free software: you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation, either version 3 of the License, or
-# (at your option) any later version.
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# GNU General Public License for more details.
-# You should have received a copy of the GNU General Public License
-# along with this program. If not, see <>.
-import base64
-from datetime import timedelta
-import http.server
-import os
-from pathlib import PurePosixPath
-import urllib.parse
-import urllib.request
-import common
-import util
-ADDRESS = 'localhost'
-PORT = 1081
-ENDPOINT = '/redirection_endpoint'
-TIMEOUT = timedelta(minutes = 30)
-LOGGER = common.LOGGER.getChild('oauth')
-class AuthCodeRequestHandler(http.server.BaseHTTPRequestHandler):
- LANDING_PATH = '/landing'
- def __init__(self, channel, csrf_token):
- = channel
- self.csrf_token = csrf_token
- self.status = None
- self.message = None
- self.caller_val = None
- def __call__(self, *args):
- # Yep, `socketserver' invokes the handler by calling it, and the superclass's
- # code for dispatching requests to the appropriate methods is in the constructor.
- # If you're confused, it's because it doesn't make any sense.
- super().__init__(*args)
- def do_GET(self):
- url = urllib.parse.urlparse(self.path)
- params = urllib.parse.parse_qs(url.query)
- path = PurePosixPath(url.path)
- if path == PurePosixPath(ENDPOINT):
- REQD_PARAMS = {'code', 'state'}
- if params.keys() == REQD_PARAMS and all(len(params[k]) == 1 for k in REQD_PARAMS):
- [code] = params['code']
- [token] = params['state']
- if token == self.csrf_token:
- self.status = 200
- self.message = "Success! You may close this page."
- self._redirect()
- self.caller_val = code
- else:
- self.status = 403
- self.message = "CSRF token check failed"
- self._redirect()
- else:
- self.status = 400
- self.message = "Auth server redirect missing required parameters"
- self._redirect()
- self.caller_val = AuthError(self.message)
- elif path == PurePosixPath(self.LANDING_PATH):
- self.send_response(self.status)
- self.end_headers()
- self.wfile.write(self.message.encode('ascii'))
- if self.caller_val is not None:
- else:
- self.send_response(404)
- self.end_headers()
- self.wfile.write('Not Found'.encode('ascii'))
- def _redirect(self):
- self.send_response(302)
- self.send_header('Location', self.LANDING_PATH)
- self.end_headers()
- def log_message(self, *_):
- pass
-def _make_pkce_pair():
- from collections import namedtuple
- import hashlib
- verifier = base64.urlsafe_b64encode(os.urandom(32)).strip(b'=')
- hasher = hashlib.sha256()
- hasher.update(verifier)
- challenge = base64.urlsafe_b64encode(hasher.digest()).strip(b'=')
- PkcePair = namedtuple('PkcePair', ['verifier', 'challenge'])
- return PkcePair(verifier.decode('ascii'), challenge.decode('ascii'))
-class AuthError(Exception):
- pass
-class AuthHandler(urllib.request.BaseHandler):
- # Precede the HTTP error handler
- handler_order = 750
- def __init__(self, storage_mgr, api_iface):
- self.storage_mgr = storage_mgr
- self.api_iface = api_iface
- @staticmethod
- def _set_auth_header(request, token_doc):
- request.add_unredirected_header(
- 'Authorization', 'Bearer {}'.format(token_doc['access_token'])
- )
- def _handle(self, request):
- token_doc = self.storage_mgr.get('authInfo')
- if not token_doc:
-"No stored access token. Requesting a new token.")
- token_doc = get_access_token(self.storage_mgr, self.api_iface)
- self.storage_mgr['authInfo'] = token_doc
- self._set_auth_header(request, token_doc)
- return request
- http_request = _handle
- https_request = _handle
- def http_error_401(self, request, fp, code, msg, headers):
-"Access token expired or revoked. Requesting a new token.")
- token_doc = get_access_token(self.storage_mgr, self.api_iface)
- self.storage_mgr['authInfo'] = token_doc
- self._set_auth_header(request, token_doc)
- return, timeout = request.timeout)
-def get_access_token(storage_mgr, api_iface):
- import json
- import queue
- from threading import Thread
- import webbrowser
- client_id = storage_mgr['clientId']
- client_secret = storage_mgr['clientSecret']
- payload = base64.b64encode(
- (client_id + ':' + client_secret).encode('utf-8')
- ).decode('ascii')
- handlers = [
- urllib.request.HTTPSHandler(),
- util.HeaderAddHandler([('Authorization', 'Basic {}'.format(payload))])
- ]
- opener = urllib.request.OpenerDirector()
- for handler in handlers:
- opener.add_handler(handler)
- token_doc = None
- if 'authInfo' in storage_mgr:
- refresh_token = storage_mgr['authInfo']['refresh_token']
- params = {
- 'grant_type': 'refresh_token',
- 'refresh_token': refresh_token,
- }
- request = urllib.request.Request(
- url = api_iface.build_api_url('v1/oauth2/token'),
- data = urllib.parse.urlencode(params).encode('utf-8'),
- method = 'POST',
- )
- with as resp:
- body = json.loads(util.decode_body(resp))
- if resp.status == 200:
- token_doc = body
- else:
-"Stored refresh token rejected. Obtaining new authorization code.")
- assert resp.status == 400
- if token_doc is None:
- redirect_uri = 'http://' + ADDRESS + ':' + str(PORT) + ENDPOINT
- pkce_pair = _make_pkce_pair()
- csrf_token = base64.urlsafe_b64encode(os.urandom(24)).decode('ascii')
- params = {
- 'redirect_uri': redirect_uri,
- 'response_type': 'code',
- 'client_id': client_id,
- 'scope': 'read offline',
- 'state': csrf_token,
- 'code_challenge': pkce_pair.challenge,
- 'code_challenge_method': 'S256',
- }
- user_url = api_iface.build_api_url(
- endpoint = 'v1/oauth2/authorizationcode',
- query = urllib.parse.urlencode(params),
- )
- channel = queue.Queue()
- server = http.server.HTTPServer(
- AuthCodeRequestHandler(channel, csrf_token)
- )
- Thread(target = server.serve_forever, daemon = True).start()
-"Attempting to launch a web browser to authorize the application…")
- if not
-"Failed to launch a browser. Please visit this URL to authorize the application:")
-" {}".format(user_url))
- try:
- resp = channel.get(timeout = TIMEOUT.total_seconds())
- except queue.Empty:
- resp = None
- server.shutdown()
- if resp is None:
- raise AuthError("Authorization timed out. Please try again.")
- elif isinstance(resp, Exception):
- raise resp
- else:
- auth_code = resp
- params = {
- 'grant_type': 'authorization_code',
- 'code': auth_code,
- 'code_verifier': pkce_pair.verifier,
- 'redirect_uri': redirect_uri,
- }
- request = urllib.request.Request(
- url = api_iface.build_api_url('v1/oauth2/token'),
- data = urllib.parse.urlencode(params).encode('utf-8'),
- method = 'POST',
- )
- with as resp:
- assert resp.status == 200
- token_doc = json.loads(util.decode_body(resp))
- return token_doc
import urllib.request
class HeaderAddHandler(urllib.request.BaseHandler):
def __init__(self, headers):
self.headers = headers
http_request = _handle
https_request = _handle
+def resolve(target, curr_url):
+ # I hope I do this like a browser...
+ parsed = urllib.parse.urlparse(target)
+ if parsed.scheme:
+ return target
+ elif target.startswith('/'):
+ curr = urllib.parse.urlparse(curr_url)
+ return curr.scheme + '://' + curr.netloc + target
+ else:
+ raise NotImplementedError("relative URI")
def decode_body(resp):
def get_charset():
if 'Content-Type' in