20 Jan 2017

Chapter 2: Flask: Diving Deeper in SQLAlchemy

Chapter 2: Diving Deeper with SQLAlchemy

In the last chapter we learnt the basics of using SqlAlchemy with flask in this chapter we will dive deeper with relationships, see what are good and bad way of writing queries and we will learn about things to keep in mind while building a databases.

Creating Databases.

We will be creating a database for blog website. We will use the user model from our last chapter optimize it and creating new tables for Post, Comments, Rating etc.

Current User Model.

class User(db.Model, BaseMixin, ReprMixin):

	email = db.Column(db.String(120), unique=True, nullable=False)
	password = db.Column(db.String(255))
	first_name = db.Column(db.String(40), nullable=False)
	last_name = db.Column(db.String(40))
	profile_picture = db.Column(db.Text()))
	bio = db.Column(db.Text()))
	active = db.Column(db.Boolean(), default=False)
	last_login_at = db.Column(db.DateTime())
	date_of_birth = db.Column(db.Date)
	mobile_number = db.Column(db.String(10), unique=True)
	gender = db.Column(db.Enum('male', 'female', 'other', name='varchar'))
	marital_status = db.Column(db.Enum('single', 'married', 'divorced',
									   'widowed', name='varchar'))
	education = db.Column(db.Enum('undergraduate', 'graduate', 'post_graduate', 
						  name='varchar'))
	
	roles = db.relationship('Role', back_populates='users', secondary='user_role')

	@hybrid_property
    def name(self):
        return '{}'.format(self.user_profile.first_name) + 
        (' {}'.format(self.user_profile.last_name) if 
        self.user_profile.last_name else '')

We can have now around 16 columns in our user table which is perfectly fine, but if we have a table with lots of columns that are frequently accessed and lots of column which are used less frequently then it’s a good approach to divide it into 2 tables.

Like:


class User(db.Model, BaseMixin, ReprMixin):

	email = db.Column(db.String(120), unique=True, nullable=False)
	password = db.Column(db.String(255))
	active = db.Column(db.Boolean(), default=False)
	last_login_at = db.Column(db.DateTime())
	mobile_number = db.Column(db.String(10), unique=True)
	
	roles = db.relationship('Role', back_populates='users', secondary='user_role')
	user_profile = db.relationship("UserProfile", back_populates="user", 
									uselist=False, cascade='all, delete-orphan', 
									lazy='select')
	
	@hybrid_property
    def name(self):
        return '{}'.format(self.user_profile.first_name) + 
        ('{}'.format(self.user_profile.last_name) if 
        self.user_profile.last_name else '')


class UserProfile(db.Model, BaseMixin, ReprMixin):
	__repr_fields__ = ['id', 'first_name']

	first_name = db.Column(db.String(40), nullable=False)
	last_name = db.Column(db.String(40))
	profile_picture = db.Column(db.Text())
	bio = db.Column(db.Text())
	date_of_birth = db.Column(db.Date)
	gender = db.Column(db.Enum('male', 'female', 'other', name='varchar'))
	marital_status = db.Column(db.Enum('single', 'married', 'divorced',
									   'widowed',  name='varchar'))
	education = db.Column(db.Enum('undergraduate', 'graduate', 'post_graduate', 
								  name='varchar'))
	
	user_id = db.Column(db.Integer, db.ForeignKey('user.id', ondelete='CASCADE'), 
						unique=True)
	
	user = db.relationship('User', back_populates="user_profile",
						   single_parent=True)

We have created one more table user_profile which contains less frequently accessed columns. The linking between user and user profile is created by adding a foreign key user_id in user_profile table with a unique constrain which makes it a one to one connection as no two user profile can contain same user_id.

A property named user is added in UserProfile and user_profile in User. Lets see all the options configured in these two tables.

user_profile = db.relationship("UserProfile", back_populates="user", 
								userlist=False, cascade='all,
								delete-orphan', lazy='select')
  1. ‘UserProfile’: The name of the Class with which a relationship has to be established.

  2. back_populates=”user”: This is the complementing property to user property in UserProfile model.

  3. cascade: This refers to how operations performed on a “parent” object should be propagated to items referred to by that relationship more here. In this case delete-orphan indicates that the user_profile object should follow along with its parent (User Model) in all cases, and be deleted once it is no longer associated with that parent.

  4. uselist=False: This means that only one object of the related model will be returned.

  5. lazy=‘select’: This defines the relation loading technique, for instance ‘select’ here means that when ever a user object is created like: user = User.query.first(), user_profile of that object will not be loaded from the database until user_profile property of the user is accessed like user.user_profile. A select query is emitted when the property is accessed. Using subquery instead of select will result in fetching of the user profile within the same query, and user.user_profile will contain the user profile object irrespective if it is accessed or not.

Adding Models


class User(db.Model, BaseMixin, ReprMixin):
    comments = db.relationship('Comment', back_populates='commenter', uselist=True,
                               lazy='dynamic')
    ratings = db.relationship('UserRating', back_populates='rater', uselist=True,
                              lazy='dynamic')
    
class Post(db.Model, BaseMixin, ReprMixin):
    __repr_fields__ = ['id', 'slug']

    slug = db.Column(db.String(55), unique=True, nullable=False)
    title = db.Column(db.String(255), unique=True, nullable=False)
    data = db.Column(db.Text, nullable=False)
    author_id = db.Column(db.Integer, db.ForeignKey('user.id'))

    author = db.relationship('User', single_parent=True, foreign_keys=[author_id])
    ratings = db.relationship('UserRating', back_populates='post', uselist=True,
                              lazy='dynamic')
    comments = db.relationship('Comment', back_populates='post', uselist=True,
                               lazy='dynamic')

    @hybrid_property
    def avg_rating(self):
        return self.ratings.with_entities(func.Avg(UserRating.rating))
        .filter(UserRating.post_id == self.id).scalar()

    @hybrid_property
    def total_comments(self):
        return self.comments.with_entities(func.Count(Comment.id))
        .filter(Comment.post_id == self.id).scalar()

	@hybrid_property
	def last_5_comments(self):
		return self.comments.filter(Comment.post_id == self.id)
		.order_by(-Comment.created_on).limit(5).all()


class Comment(db.Model, BaseMixin, ReprMixin):
    __repr_fields__ = ['id', 'commented_by']

    data = db.Column(db.Text, nullable=False)
    is_moderated = db.Column(db.Boolean(), default=False)

    post_id = db.Column(db.Integer, db.ForeignKey('post.id'))
    commented_by = db.Column(db.Integer, db.ForeignKey('user.id'))

    post = db.relationship('Post', foreign_keys=[post_id],
						     back_populates='comments')
    commenter = db.relationship('User', foreign_keys=[commented_by], 
							    back_populates='comments')


class UserRating(db.Model, BaseMixin, ReprMixin):
    __repr_fields__ = ['rating', 'post_id', 'rated_by']

    rating = db.Column(db.SmallInteger, nullable=False)

    rated_by = db.Column(db.Integer, db.ForeignKey('user.id'))
    post_id = db.Column(db.Integer, db.ForeignKey('post.id'))

    post = db.relationship('Post', back_populates='ratings', 
		    foreign_keys=[post_id])
    rater = db.relationship('User', foreign_keys=[rated_by],
		     back_populates='ratings')

We have Post Model which contains a slug which can uniquely identify the post, a title, data which contains what the user has written, user_id which contains the user id of the user who has written the post. The data is of type Text, we are using Text instead of String(varchar) as their is no limit to the characters you can store with Text and the data is stored off the table with the table just having a pointer to the location of the actual storage. VARCHAR is stored inline with the table. Only downside is we can’t index Text column which we were not going to anyways.

Comment Model has data which contains the comment that user has posted, user_id, post_id which contains the post id of the post for which the comment has been written by the user.

UserRating contains the rating given, the user_id, and post_id.

Hybrid Properties

We have 3 hybrid properties in Post model

  1. avg_rating which attaches a property which will give us the average rating of the post.
  2. total_comments wil give us the number of comments on the post.
  3. last_5_comments will give us last 5 comments on the post.

Hybrid Properties attaches a property to a model, which we can not query on. We can’t do this Post.query.filter(Post.avg_rating>3.5).all() but defining an expression for hybrid property will allow us to query on it.

""" Finding all the post with average rating above 3.5"""
from sqlalchemy import and_, func

posts = Post.query.join(UserRating, and_(UserRating.post_id == Post.id))
			.group_by(UserRating.post_id).having(func.Avg(UserRating.rating)>3.5).all()

""" Wouldn't it be better if we can do something like"""
posts = Post.query.filter(Post.avg_rating>3.5).all()

Let’s see how we can do this by creating an expression for our hybrid properties to query directly.

class Post(db.Model, BaseMixin, ReprMixin):
    __repr_fields__ = ['id', 'slug']
    
    @hybrid_property
    def avg_rating(self):
        return self.ratings.with_entities(func.Avg(UserRating.rating))
			       .filter(UserRating.post_id == self.id).scalar()

    @avg_rating.expression
    def avg_rating(cls):
        return select([func.Avg(UserRating.rating)])
		        .where(cls.id == UserRating.post_id).as_scalar()

Note: Hybrid properties and expressions will help us a lot when we will write Restful Api’s in chapter 5.

Adjacency List

What happens if a user wants to comment on a comment of another user in some post. How can we handle that?

Using adjacency list.

Comment 2-->Comment 1: Yes! Very nice post!
Comment 1->Post 1: Nice post!

Example: A user has commented Nice post! on a post, another user commented on that comment with Yes! Very nice post!

class Comment(db.Model, BaseMixin, ReprMixin):
    __repr_fields__ = ['id', 'commented_by']

	parent_comment_id = db.Column(db.Integer, db.ForeignKey('comment.id'))
	parent_comment = db.relationship('Comment', remote_side='Comment.id')
	children_comment = db.relationship('Comment', 
										remote_side='Comment.parent_comment_id') 

We will keep adding models as per our requirements but for now these models will suffice. Lets optimize our models for faster querying and put some constraints.

Constraints and Indexes

We will add constraints and Indexes to our database.

  1. We will index all the primary keys and foreign keys.
  2. We will index all the columns which will be searched frequently like title in post.
  3. We will put constrains like:
    1. We don’t want a user to rate a post multiple time, for this we will add UniqueConstraint to our UserRate Model.
    2. Fields like email, mobile_number in user are already marked Unique or slug in post are already marked Unique.
    3. In UserRole Model we don’t want a single user alloted same role again and again for that we will add Unique constraint.

Final Code


import os
import re

from flask import Flask
from flask_sqlalchemy import SQLAlchemy
from sqlalchemy.ext.declarative import declared_attr
from sqlalchemy.ext.hybrid import hybrid_property
from sqlalchemy import func, select, UniqueConstraint, Index

basedir = os.path.abspath(os.path.dirname(__file__))
app = Flask(__name__)
app.config['SQLALCHEMY_DATABASE_URI'] = 'sqlite:///{}'.format(os.path.join(basedir, 'test.db'))
db = SQLAlchemy(app)
db.create_all()


def to_underscore(name):

    s1 = re.sub('(.)([A-Z][a-z]+)', r'\1_\2', name)
    return re.sub('([a-z0-9])([A-Z])', r'\1_\2', s1).lower()


class BaseMixin(object):

    @declared_attr
    def __tablename__(self):
        return to_underscore(self.__name__)

    id = db.Column(db.Integer, primary_key=True, index=True)
    created_on = db.Column(db.TIMESTAMP, default=db.func.current_timestamp())
    updated_on = db.Column(db.TIMESTAMP, onupdate=db.func.current_timestamp())


class ReprMixin(object):

    __repr_fields__ = ['id', 'name']

    def __repr__(self):
        fields = {f: getattr(self, f, '<BLANK>') for f in self.__repr_fields__}
        pattern = ['{0}=}'.format(f) for f in self.__repr_fields__]
        pattern = ' '.join(pattern)
        pattern = pattern.format(**fields)
        return '<{} {}>'.format(self.__class__.__name__, pattern)


class User(db.Model, BaseMixin, ReprMixin):
    email = db.Column(db.String(120), unique=True, nullable=False, index=True)
    password = db.Column(db.String(255))
    active = db.Column(db.Boolean(), default=False)
    last_login_at = db.Column(db.DateTime())
    mobile_number = db.Column(db.String(10), unique=True, index=True)

    roles = db.relationship('Role', back_populates='users', secondary='user_role')
    user_profile = db.relationship("UserProfile", back_populates="user",
                                   uselist=False, cascade='all, delete-orphan',
                                   lazy='select')
    comments = db.relationship('Comment', back_populates='commenter', uselist=True,
                               lazy='dynamic')
    ratings = db.relationship('UserRating', back_populates='rater', uselist=True,
                              lazy='dynamic')

@hybrid_property
    def name(self):
        return '{}'.format(self.user_profile.first_name) +
         ('{}'.format(self.user_profile.last_name) if 
         self.user_profile.last_name else '')


class UserProfile(db.Model, BaseMixin, ReprMixin):
    __repr_fields__ = ['id', 'first_name']

    first_name = db.Column(db.String(40), nullable=False)
    last_name = db.Column(db.String(40))
    profile_picture = db.Column(db.Text())
    bio = db.Column(db.Text())
    date_of_birth = db.Column(db.Date)
    gender = db.Column(db.Enum('male', 'female', 'other', name='varchar'))
    marital_status = db.Column(db.Enum('single', 'married', 'divorced', 'widowed', name='varchar'))
    education = db.Column(db.Enum('undergraduate', 'graduate', 'post_graduate', name='varchar'))

    user_id = db.Column(db.Integer, db.ForeignKey('user.id', ondelete='CASCADE'), unique=True, index=True)

    user = db.relationship('User', back_populates="user_profile", single_parent=True)


class Role(db.Model, BaseMixin, ReprMixin):

    name = db.Column(db.String(80), unique=True)
    description = db.Column(db.Text, unique=True)
    
    users = db.relationship('User', secondary='user_role', back_populates='roles')


class UserRole(db.Model, BaseMixin, ReprMixin):
    __repr_fields__ = ['user_id', 'role_id']

    user_id = db.Column(db.Integer, db.ForeignKey('user.id'))
    role_id = db.Column(db.Integer, db.ForeignKey('role.id'))

    role = db.relationship('Role', foreign_keys=[role_id])
    user = db.relationship('User', foreign_keys=[user_id])

    UniqueConstraint(role_id, user_id, 'role_user_un')


class Post(db.Model, BaseMixin, ReprMixin):
    __repr_fields__ = ['id', 'slug']

    slug = db.Column(db.String(55), unique=True, nullable=False, index=True)
    title = db.Column(db.String(255), nullable=False, index=True)
    data = db.Column(db.Text, nullable=False)
    author_id = db.Column(db.Integer, db.ForeignKey('user.id'), index=True)

    author = db.relationship('User', single_parent=True, foreign_keys=[author_id])
    ratings = db.relationship('UserRating', back_populates='post', uselist=True,
                              lazy='dynamic')
    comments = db.relationship('Comment', back_populates='post', uselist=True,
                               lazy='dynamic')

    @hybrid_property
    def avg_rating(self):
        return self.ratings.with_entities(func.Avg(UserRating.rating)).filter(UserRating.post_id == self.id).scalar()

    @hybrid_property
    def total_comments(self):
        return self.comments.with_entities(func.Count(Comment.id)).filter(Comment.post_id == self.id).scalar()

    @avg_rating.expression
    def avg_rating(cls):
        return select([func.Avg(UserRating.rating)]).where(cls.id == UserRating.post_id).as_scalar()


class Comment(db.Model, BaseMixin, ReprMixin):
    __repr_fields__ = ['id', 'commented_by']

    data = db.Column(db.Text, nullable=False)
    is_moderated = db.Column(db.Boolean(), default=False)

    post_id = db.Column(db.Integer, db.ForeignKey('post.id'), index=True)
    commented_by = db.Column(db.Integer, db.ForeignKey('user.id'), index=True)
    parent_comment_id = db.Column(db.Integer, db.ForeignKey('comment.id'), index=True)

    post = db.relationship('Post', foreign_keys=[post_id], back_populates='comments')
    commenter = db.relationship('User', foreign_keys=[commented_by], back_populates='comments')
    parent_comment = db.relationship('Comment', remote_side='Comment.id')
    children_comment = db.relationship('Comment', remote_side='Comment.parent_comment_id') 


class UserRating(db.Model, BaseMixin, ReprMixin):
    __repr_fields__ = ['rating', 'post_id', 'rated_by']

    rating = db.Column(db.SmallInteger, nullable=False)

    rated_by = db.Column(db.Integer, db.ForeignKey('user.id'), index=True)
    post_id = db.Column(db.Integer, db.ForeignKey('post.id'), index=True)

    post = db.relationship('Post', back_populates='ratings', foreign_keys=[post_id])
    rater = db.relationship('User', foreign_keys=[rated_by], back_populates='ratings')

    UniqueConstraint(rated_by, post_id, 'user_post_un')


@app.route('/')
def hello_world():
    return 'Hello, World!'

Tags:
Stats:
0 comments