In the last chapter we learnt the basics of using SqlAlchemy with flask in this chapter we will dive deeper with relationships, see what are good and bad way of writing queries and we will learn about things to keep in mind while building a databases.
We will be creating a database for blog website. We will use the user model from our last chapter optimize it and creating new tables for Post, Comments, Rating etc.
class User(db.Model, BaseMixin, ReprMixin):
email = db.Column(db.String(120), unique=True, nullable=False)
password = db.Column(db.String(255))
first_name = db.Column(db.String(40), nullable=False)
last_name = db.Column(db.String(40))
profile_picture = db.Column(db.Text()))
bio = db.Column(db.Text()))
active = db.Column(db.Boolean(), default=False)
last_login_at = db.Column(db.DateTime())
date_of_birth = db.Column(db.Date)
mobile_number = db.Column(db.String(10), unique=True)
gender = db.Column(db.Enum('male', 'female', 'other', name='varchar'))
marital_status = db.Column(db.Enum('single', 'married', 'divorced',
'widowed', name='varchar'))
education = db.Column(db.Enum('undergraduate', 'graduate', 'post_graduate',
name='varchar'))
roles = db.relationship('Role', back_populates='users', secondary='user_role')
@hybrid_property
def name(self):
return '{}'.format(self.user_profile.first_name) +
(' {}'.format(self.user_profile.last_name) if
self.user_profile.last_name else '')
We can have now around 16 columns in our user table which is perfectly fine, but if we have a table with lots of columns that are frequently accessed and lots of column which are used less frequently then it’s a good approach to divide it into 2 tables.
class User(db.Model, BaseMixin, ReprMixin):
email = db.Column(db.String(120), unique=True, nullable=False)
password = db.Column(db.String(255))
active = db.Column(db.Boolean(), default=False)
last_login_at = db.Column(db.DateTime())
mobile_number = db.Column(db.String(10), unique=True)
roles = db.relationship('Role', back_populates='users', secondary='user_role')
user_profile = db.relationship("UserProfile", back_populates="user",
uselist=False, cascade='all, delete-orphan',
lazy='select')
@hybrid_property
def name(self):
return '{}'.format(self.user_profile.first_name) +
('{}'.format(self.user_profile.last_name) if
self.user_profile.last_name else '')
class UserProfile(db.Model, BaseMixin, ReprMixin):
__repr_fields__ = ['id', 'first_name']
first_name = db.Column(db.String(40), nullable=False)
last_name = db.Column(db.String(40))
profile_picture = db.Column(db.Text())
bio = db.Column(db.Text())
date_of_birth = db.Column(db.Date)
gender = db.Column(db.Enum('male', 'female', 'other', name='varchar'))
marital_status = db.Column(db.Enum('single', 'married', 'divorced',
'widowed', name='varchar'))
education = db.Column(db.Enum('undergraduate', 'graduate', 'post_graduate',
name='varchar'))
user_id = db.Column(db.Integer, db.ForeignKey('user.id', ondelete='CASCADE'),
unique=True)
user = db.relationship('User', back_populates="user_profile",
single_parent=True)
We have created one more table user_profile which contains less frequently accessed columns. The linking between user and user profile is created by adding a foreign key user_id in user_profile table with a unique constrain which makes it a one to one connection as no two user profile can contain same user_id.
A property named user is added in UserProfile and user_profile in User. Lets see all the options configured in these two tables.
user_profile = db.relationship("UserProfile", back_populates="user",
userlist=False, cascade='all,
delete-orphan', lazy='select')
‘UserProfile’: The name of the Class with which a relationship has to be established.
back_populates=”user”: This is the complementing property to user property in UserProfile model.
cascade: This refers to how operations performed on a “parent” object should be propagated to items referred to by that relationship more here. In this case delete-orphan indicates that the user_profile object should follow along with its parent (User Model) in all cases, and be deleted once it is no longer associated with that parent.
uselist=False: This means that only one object of the related model will be returned.
lazy=‘select’: This defines the relation loading technique, for instance ‘select’ here means that when ever a user object is created like: user = User.query.first()
, user_profile of that object will not be loaded from the database until user_profile property of the user is accessed like user.user_profile
. A select query is emitted when the property is accessed. Using subquery
instead of select
will result in fetching of the user profile within the same query, and user.user_profile
will contain the user profile object irrespective if it is accessed or not.
class User(db.Model, BaseMixin, ReprMixin):
comments = db.relationship('Comment', back_populates='commenter', uselist=True,
lazy='dynamic')
ratings = db.relationship('UserRating', back_populates='rater', uselist=True,
lazy='dynamic')
class Post(db.Model, BaseMixin, ReprMixin):
__repr_fields__ = ['id', 'slug']
slug = db.Column(db.String(55), unique=True, nullable=False)
title = db.Column(db.String(255), unique=True, nullable=False)
data = db.Column(db.Text, nullable=False)
author_id = db.Column(db.Integer, db.ForeignKey('user.id'))
author = db.relationship('User', single_parent=True, foreign_keys=[author_id])
ratings = db.relationship('UserRating', back_populates='post', uselist=True,
lazy='dynamic')
comments = db.relationship('Comment', back_populates='post', uselist=True,
lazy='dynamic')
@hybrid_property
def avg_rating(self):
return self.ratings.with_entities(func.Avg(UserRating.rating))
.filter(UserRating.post_id == self.id).scalar()
@hybrid_property
def total_comments(self):
return self.comments.with_entities(func.Count(Comment.id))
.filter(Comment.post_id == self.id).scalar()
@hybrid_property
def last_5_comments(self):
return self.comments.filter(Comment.post_id == self.id)
.order_by(-Comment.created_on).limit(5).all()
class Comment(db.Model, BaseMixin, ReprMixin):
__repr_fields__ = ['id', 'commented_by']
data = db.Column(db.Text, nullable=False)
is_moderated = db.Column(db.Boolean(), default=False)
post_id = db.Column(db.Integer, db.ForeignKey('post.id'))
commented_by = db.Column(db.Integer, db.ForeignKey('user.id'))
post = db.relationship('Post', foreign_keys=[post_id],
back_populates='comments')
commenter = db.relationship('User', foreign_keys=[commented_by],
back_populates='comments')
class UserRating(db.Model, BaseMixin, ReprMixin):
__repr_fields__ = ['rating', 'post_id', 'rated_by']
rating = db.Column(db.SmallInteger, nullable=False)
rated_by = db.Column(db.Integer, db.ForeignKey('user.id'))
post_id = db.Column(db.Integer, db.ForeignKey('post.id'))
post = db.relationship('Post', back_populates='ratings',
foreign_keys=[post_id])
rater = db.relationship('User', foreign_keys=[rated_by],
back_populates='ratings')
We have Post Model which contains a slug
which can uniquely identify the post, a title
, data
which contains what the user has written, user_id
which contains the user id of the user who has written the post. The data is of type Text, we are using Text instead of String(varchar) as their is no limit to the characters you can store with Text and the data is stored off the table with the table just having a pointer to the location of the actual storage. VARCHAR
is stored inline with the table. Only downside is we can’t index Text
column which we were not going to anyways.
Comment Model has data which contains the comment that user has posted, user_id
, post_id
which contains the post id of the post for which the comment has been written by the user.
UserRating contains the rating
given, the user_id
, and post_id
.
We have 3 hybrid properties in Post model
avg_rating
which attaches a property which will give us the average rating of the post.total_comments
wil give us the number of comments on the post.last_5_comments
will give us last 5 comments on the post.Hybrid Properties attaches a property to a model, which we can not query on. We can’t do this Post.query.filter(Post.avg_rating>3.5).all()
but defining an expression for hybrid property will allow us to query on it.
""" Finding all the post with average rating above 3.5"""
from sqlalchemy import and_, func
posts = Post.query.join(UserRating, and_(UserRating.post_id == Post.id))
.group_by(UserRating.post_id).having(func.Avg(UserRating.rating)>3.5).all()
""" Wouldn't it be better if we can do something like"""
posts = Post.query.filter(Post.avg_rating>3.5).all()
Let’s see how we can do this by creating an expression for our hybrid properties to query directly.
class Post(db.Model, BaseMixin, ReprMixin):
__repr_fields__ = ['id', 'slug']
@hybrid_property
def avg_rating(self):
return self.ratings.with_entities(func.Avg(UserRating.rating))
.filter(UserRating.post_id == self.id).scalar()
@avg_rating.expression
def avg_rating(cls):
return select([func.Avg(UserRating.rating)])
.where(cls.id == UserRating.post_id).as_scalar()
Note: Hybrid properties and expressions will help us a lot when we will write Restful Api’s in chapter 5.
What happens if a user wants to comment on a comment of another user in some post. How can we handle that?
Using adjacency list.
Comment 2-->Comment 1: Yes! Very nice post!
Comment 1->Post 1: Nice post!
Example: A user has commented Nice post! on a post, another user commented on that comment with Yes! Very nice post!
class Comment(db.Model, BaseMixin, ReprMixin):
__repr_fields__ = ['id', 'commented_by']
parent_comment_id = db.Column(db.Integer, db.ForeignKey('comment.id'))
parent_comment = db.relationship('Comment', remote_side='Comment.id')
children_comment = db.relationship('Comment',
remote_side='Comment.parent_comment_id')
We will keep adding models as per our requirements but for now these models will suffice. Lets optimize our models for faster querying and put some constraints.
We will add constraints and Indexes to our database.
primary keys
and foreign keys
.title
in post
.UniqueConstraint
to our UserRate Model.email, mobile_number
in user
are already marked Unique
or slug
in post
are already marked Unique
.UserRole Model
we don’t want a single user alloted same role again and again for that we will add Unique
constraint.
import os
import re
from flask import Flask
from flask_sqlalchemy import SQLAlchemy
from sqlalchemy.ext.declarative import declared_attr
from sqlalchemy.ext.hybrid import hybrid_property
from sqlalchemy import func, select, UniqueConstraint, Index
basedir = os.path.abspath(os.path.dirname(__file__))
app = Flask(__name__)
app.config['SQLALCHEMY_DATABASE_URI'] = 'sqlite:///{}'.format(os.path.join(basedir, 'test.db'))
db = SQLAlchemy(app)
db.create_all()
def to_underscore(name):
s1 = re.sub('(.)([A-Z][a-z]+)', r'\1_\2', name)
return re.sub('([a-z0-9])([A-Z])', r'\1_\2', s1).lower()
class BaseMixin(object):
@declared_attr
def __tablename__(self):
return to_underscore(self.__name__)
id = db.Column(db.Integer, primary_key=True, index=True)
created_on = db.Column(db.TIMESTAMP, default=db.func.current_timestamp())
updated_on = db.Column(db.TIMESTAMP, onupdate=db.func.current_timestamp())
class ReprMixin(object):
__repr_fields__ = ['id', 'name']
def __repr__(self):
fields = {f: getattr(self, f, '<BLANK>') for f in self.__repr_fields__}
pattern = ['{0}=}'.format(f) for f in self.__repr_fields__]
pattern = ' '.join(pattern)
pattern = pattern.format(**fields)
return '<{} {}>'.format(self.__class__.__name__, pattern)
class User(db.Model, BaseMixin, ReprMixin):
email = db.Column(db.String(120), unique=True, nullable=False, index=True)
password = db.Column(db.String(255))
active = db.Column(db.Boolean(), default=False)
last_login_at = db.Column(db.DateTime())
mobile_number = db.Column(db.String(10), unique=True, index=True)
roles = db.relationship('Role', back_populates='users', secondary='user_role')
user_profile = db.relationship("UserProfile", back_populates="user",
uselist=False, cascade='all, delete-orphan',
lazy='select')
comments = db.relationship('Comment', back_populates='commenter', uselist=True,
lazy='dynamic')
ratings = db.relationship('UserRating', back_populates='rater', uselist=True,
lazy='dynamic')
@hybrid_property
def name(self):
return '{}'.format(self.user_profile.first_name) +
('{}'.format(self.user_profile.last_name) if
self.user_profile.last_name else '')
class UserProfile(db.Model, BaseMixin, ReprMixin):
__repr_fields__ = ['id', 'first_name']
first_name = db.Column(db.String(40), nullable=False)
last_name = db.Column(db.String(40))
profile_picture = db.Column(db.Text())
bio = db.Column(db.Text())
date_of_birth = db.Column(db.Date)
gender = db.Column(db.Enum('male', 'female', 'other', name='varchar'))
marital_status = db.Column(db.Enum('single', 'married', 'divorced', 'widowed', name='varchar'))
education = db.Column(db.Enum('undergraduate', 'graduate', 'post_graduate', name='varchar'))
user_id = db.Column(db.Integer, db.ForeignKey('user.id', ondelete='CASCADE'), unique=True, index=True)
user = db.relationship('User', back_populates="user_profile", single_parent=True)
class Role(db.Model, BaseMixin, ReprMixin):
name = db.Column(db.String(80), unique=True)
description = db.Column(db.Text, unique=True)
users = db.relationship('User', secondary='user_role', back_populates='roles')
class UserRole(db.Model, BaseMixin, ReprMixin):
__repr_fields__ = ['user_id', 'role_id']
user_id = db.Column(db.Integer, db.ForeignKey('user.id'))
role_id = db.Column(db.Integer, db.ForeignKey('role.id'))
role = db.relationship('Role', foreign_keys=[role_id])
user = db.relationship('User', foreign_keys=[user_id])
UniqueConstraint(role_id, user_id, 'role_user_un')
class Post(db.Model, BaseMixin, ReprMixin):
__repr_fields__ = ['id', 'slug']
slug = db.Column(db.String(55), unique=True, nullable=False, index=True)
title = db.Column(db.String(255), nullable=False, index=True)
data = db.Column(db.Text, nullable=False)
author_id = db.Column(db.Integer, db.ForeignKey('user.id'), index=True)
author = db.relationship('User', single_parent=True, foreign_keys=[author_id])
ratings = db.relationship('UserRating', back_populates='post', uselist=True,
lazy='dynamic')
comments = db.relationship('Comment', back_populates='post', uselist=True,
lazy='dynamic')
@hybrid_property
def avg_rating(self):
return self.ratings.with_entities(func.Avg(UserRating.rating)).filter(UserRating.post_id == self.id).scalar()
@hybrid_property
def total_comments(self):
return self.comments.with_entities(func.Count(Comment.id)).filter(Comment.post_id == self.id).scalar()
@avg_rating.expression
def avg_rating(cls):
return select([func.Avg(UserRating.rating)]).where(cls.id == UserRating.post_id).as_scalar()
class Comment(db.Model, BaseMixin, ReprMixin):
__repr_fields__ = ['id', 'commented_by']
data = db.Column(db.Text, nullable=False)
is_moderated = db.Column(db.Boolean(), default=False)
post_id = db.Column(db.Integer, db.ForeignKey('post.id'), index=True)
commented_by = db.Column(db.Integer, db.ForeignKey('user.id'), index=True)
parent_comment_id = db.Column(db.Integer, db.ForeignKey('comment.id'), index=True)
post = db.relationship('Post', foreign_keys=[post_id], back_populates='comments')
commenter = db.relationship('User', foreign_keys=[commented_by], back_populates='comments')
parent_comment = db.relationship('Comment', remote_side='Comment.id')
children_comment = db.relationship('Comment', remote_side='Comment.parent_comment_id')
class UserRating(db.Model, BaseMixin, ReprMixin):
__repr_fields__ = ['rating', 'post_id', 'rated_by']
rating = db.Column(db.SmallInteger, nullable=False)
rated_by = db.Column(db.Integer, db.ForeignKey('user.id'), index=True)
post_id = db.Column(db.Integer, db.ForeignKey('post.id'), index=True)
post = db.relationship('Post', back_populates='ratings', foreign_keys=[post_id])
rater = db.relationship('User', foreign_keys=[rated_by], back_populates='ratings')
UniqueConstraint(rated_by, post_id, 'user_post_un')
@app.route('/')
def hello_world():
return 'Hello, World!'