33
loading...
This website collects cookies to deliver better user experience
app/__init__.py
file contains all of the configuration necessary to get started.from flask import Flask
from flask_sqlalchemy import SQLAlchemy
from app.util import create_db_uri
db = SQLAlchemy()
def init_app(app):
db.init_app(app)
return app
def create_app():
app = Flask(__name__)
app.config["SQLALCHEMY_DATABASE_URI"] = create_db_uri()
app.config["SQLALCHEMY_ECHO"] = False
return app
from app.models import * # noqa
SQLALCHEMY_DATABASE_URI
tells Flask-SQLAlchemy how to connect to the database. This ultimately depends on the environment variables we saw in Part 2, such as POSTGRES_USER
and POSTGRES_HOST
.SQLALCHEMY_ECHO
value is useful when debugging – when set to true, every SQL statement is logged, so you can see what's happening at every step. We'll see a lot of the global db variable throughout the application, because we'll import it wherever we need to interact with the database. db.Model
and define the columns, indexes, and constraints that belong to that model.from app import db
class ProxyConnection(db.Model):
__tablename__ = "proxyconn"
id = db.Column(db.Integer, primary_key=True)
proxy_url = db.Column(db.String, nullable=False)
username = db.Column(db.String, nullable=False)
password = db.Column(db.String, nullable=False)
allow_parallel = db.Column(
db.Boolean, default=False, server_default="f", nullable=False
)
usage_count = db.Column(db.Integer, default=0, server_default="0")
block_count = db.Column(db.Integer, default=0, server_default="0")
consecutive_fails = db.Column(db.Integer, default=0, server_default="0")
engaged = db.Column(db.Boolean, default=False, server_default="f")
min_wait_time = db.Column(db.Integer, default=0, server_default="0", nullable=False)
random_delay = db.Column(db.Integer, default=0, server_default="0", nullable=False)
last_used = db.Column(db.DateTime, index=True, nullable=True)
consecutive_fails
we take it out of rotation. The project will eventually have many tables, with a model for everything from users to ranking results.manage.py
to create an interactive shell. This shell session is almost the same as an interactive Python session, but within the context of the Flask application.docker exec -it openranktracker_app_1 python manage.py shell
>>> db.create_all()
db
global available, and the create_all function will initialize the entire schema within Postgres. Once that step is complete, you can verify that the tables were created using a psql
session.docker exec -it openranktracker_database_1 psql -U pguser -d openranktracker
psql (11.4 (Debian 11.4-1.pgdg90+1))
Type "help" for help.
openranktracker=# \d
List of relations
Schema | Name | Type | Owner
-------------+------------------+----------+--------
public | domain | table | pguser
public | domain_id_seq | sequence | pguser
public | keyword | table | pguser
public | keyword_id_seq | sequence | pguser
public | proxyconn | table | pguser
public | proxyconn_id_seq | sequence | pguser
(6 rows)
openranktracker=#
class Artist(db.Model):
id = db.Column(db.Integer, primary_key=True)
name = db.Column(db.String)
class Album(db.Model):
id = db.Column(db.Integer, primary_key=True)
name = db.Column(db.String)
artist_id = db.Column(
db.Integer,
db.ForeignKey("artist.id", ondelete="CASCADE"),
index=True,
nullable=False,
)
artist = db.relationship(
Artist,
backref=db.backref(
"albums", order_by="Album.name", cascade="delete-orphan,all"
),
)
class Song(db.Model):
id = db.Column(db.Integer, primary_key=True)
name = db.Column(db.String)
album_id = db.Column(
db.Integer,
db.ForeignKey("album.id", ondelete="CASCADE"),
index=True,
nullable=False,
)
album = db.relationship(
Album,
backref=db.backref(
"songs", order_by="Song.name", cascade="delete-orphan,all"
),
)
result = []
for artist in Artist.query.all():
for album in artist.albums:
for song in album.songs:
result.append((artist.name, album.name, song.name))
SQLALCHEMY_ECHO
to true, we see a surprising number of queries sent to the database for such a simple program.[2021-06-26 17:03:27,602] INFO in log: SELECT artist.id AS artist_id, artist.name AS artist_name
FROM artist
[2021-06-26 17:03:27,607] INFO in log: SELECT album.id AS album_id, album.name AS album_name, album.artist_id AS album_artist_id
FROM album
WHERE %(param_1)s = album.artist_id ORDER BY album.name
[2021-06-26 17:03:27,610] INFO in log: SELECT song.id AS song_id, song.name AS song_name, song.album_id AS song_album_id
FROM song
WHERE %(param_1)s = song.album_id ORDER BY song.name
artist = db.relationship(
Artist,
backref=db.backref(
"albums", order_by="Album.name", lazy="joined", cascade="delete-orphan,all"
),
)
lazy="joined"
instructs SQLAlchemy to always load the related data upfront by issuing a query with a JOIN. This means fewer queries to the database, because the data is already available when accessing the relationship attributes.[2021-06-26 17:21:44,224] INFO in log: SELECT artist.id AS artist_id, artist.name AS artist_name, album_1.id AS album_1_id, album_1.name AS album_1_name, album_1.artist_id AS album_1_artist_id, song_1.id AS song_1_id, song_1.name AS song_1_name, song_1.album_id AS song_1_album_id
FROM artist LEFT OUTER JOIN album AS album_1 ON artist.id = album_1.artist_id LEFT OUTER JOIN song AS song_1 ON album_1.id = song_1.album_id ORDER BY album_1.name, song_1.name
result = Artist.query.join(Album, Song).all()
commit()
called near the end of a route handler.app/__init__.py
to register the Flask blueprint that represents the API. A blueprint is a Flask concept that allows for endpoints sharing a common prefix (i.e. /api
in our case) to be grouped together.def init_app(app):
db.init_app(app)
from app.api import api_blueprint
app.register_blueprint(api_blueprint)
return app
db
object.app/api/__init__.py
is where that happens. We'll start off with just one route, for handling data coming back from the Puppeteer search scraper.from flask import Blueprint
from flask_restful import Api
from app.api.keywords.scan_callback import ScanCallbackView
api_blueprint = Blueprint("main", __name__, url_prefix="/api")
api = Api(api_blueprint)
api.add_resource(ScanCallbackView, "/keywords/<int:keyword_id>/callback/")
<int:keyword_id>
in the URL path is a placeholder for a variable that we expect to receive. The value will be passed along to the handler method, as we'll see in the next snippet of code.from flask import request, abort
from flask import current_app as app
from flask_restful import Resource
from app.services.keyword import handle_scraper_response
class ScanCallbackView(Resource):
def post(self, keyword_id):
data = request.get_json()
app.logger.debug("Keyword scan callback initiated")
if data.get("secret_key") != app.config["SECRET_KEY"]:
app.logger.warning(
"Scan callback did not provide correct secret key: {}".format(
data.get("secret_key")
)
)
abort(403)
handle_scraper_response(keyword_id, data)
return "", 201
request.get_json()
before processing the results within the handle_scraper_response
function.SECRET_KEY
config value, which it will pass to scraper sessions. When a scraper sends its POST request, it will include that same secret key for verification.