Set up a mock database in Python for unit testing

My Most Recent Code for Testing

import pandas as pd import pyodbc import unittest import pandas.util.testing as tm from unittest import mock # Function that I want to test def p2ctt_data_frame(): conn = pyodbc.connect( r'Driver={Microsoft Access Driver (*.mdb, *.accdb)};' r'DBQ=My\Path\To\Actual\Database\Access Database.accdb;' ) query = 'select * from P2CTT_2016_Plus0HHs' # I want to make sure this dataframe object is created as intended df = pd.read_sql(query, conn) return df class TestMockDatabase(unittest.TestCase): @mock.patch('directory1.script1.pyodbc.connect') # Mocking connection def test_mock_database(self, mock_access_database): # The dataframe I expect as the output after query is run on the 'mock database' expected_result = pd.DataFrame({ 'POSTAL_CODE':[ 'A0A0A1' ], 'DA_ID':[ 1001001 ], 'GHHDS_DA':[ 100 ] }) # This is the line that I believe is wrong. I want to create a return value that mocks an Access table mock_access_database.connect().return_value = [('POSTAL_CODE', 'DA_ID', 'GHHDS_DA'), ('A0A0A1', 1001001, 100)] result = p2ctt_data_frame() # Run original function on the mock database tm.assert_frame_equal(result, expected_result) if __name__ == "__main__": unittest.main()

I expect that the expected dataframe and the result after running the test using the mock database object is one and the same. This is not the case.

Currently, if I print out the result when trying to mock the database I get:

Empty DataFrame Columns: [] Index: []

Furthermore, I get the following error after the test is run:

AssertionError: DataFrame are different;
DataFrame shape mismatch [left]: (0, 0) [right]: (1, 3)

I would break it up into a few separate tests. A functional test that the desired result will be produced, a test to make sure you can access the database and get expected results, and the final unittest on how to implement it. I would write each test in that order completing the tests first before the actual function. If found that if I can't figure out how to do something I'll try it on a separate REPL or create a git branch to work on it then go back to the main branch. More information can be found here: https://obeythetestinggoat.com/book/praise.harry.html

Comments for each test and the reason behind it is in the code.

import pandas as pd
import pyodbc

def p2ctt_data_frame(query='SELECT * FROM P2CTT_2016_Plus0HHs;'): # set query as default
    with  pyodbc.connect(
        r'Driver={Microsoft Access Driver (*.mdb, *.accdb)};'
        r'DBQ=My\Path\To\Actual\Database\Access Database.accdb;'
    ) as conn:  # use with so the connection is closed once completed

        df = pd.read_sql(query, conn)

    return df

Separate test file:

import pandas as pd
import pyodbc
import unittest
from unittest import mock

class TestMockDatabase(unittest.TestCase):

    def test_p2ctt_data_frame_functional_test(self):  # Functional test on data I know will not change
        actual_df = p2ctt_data_frame(query='SELECT * FROM P2CTT_2016_Plus0HHs WHERE DA_ID = 1001001;')

        expected_df = pd.DataFrame({
            'POSTAL_CODE':[
                'A0A0A1'
            ],
            'DA_ID':[
                1001001
            ],
            'GHHDS_DA':[
                100
            ]
        })

        self.assertTrue(actual_df == expected_df)

    def test_access_database_returns_values(self):  # integration test with the database to make sure it works
        with pyodbc.connect(
            r'Driver={Microsoft Access Driver (*.mdb, *.accdb)};'
            r'DBQ=My\Path\To\Actual\Database\Access Database.accdb;'
        ) as conn:
            with conn.cursor() as cursor:
                cursor.execute("SELECT TOP 1 * FROM P2CTT_2016_Plus0HHs WHERE DA_ID = 1001001;")
                result = cursor.fetchone()

        self.assertTrue(len(result) == 3)  # should be 3 columns by 1 row

        # Look for accuracy in the database
        info_from_db = []
        for data in result:  # add to the list all data in the database
            info_from_db.append(data)

        self.assertListEqual(   # All the information matches in the database
            ['A0A0A1', 1001001, 100], info_from_db
        )


    @mock.patch('directory1.script1.pd')  # testing pandas
    @mock.patch('directory1.script1.pyodbc.connect')  # Mocking connection so nothing sent to the outside
    def test_pandas_read_sql_called(self, mock_access_database, mock_pd):  # unittest for the implentation of the function
        p2ctt_data_frame()
        self.assert_True(mock_pd.called)  # Make sure that pandas has been called
        self.assertIn(
            mock.call('select * from P2CTT_2016_Plus0HHs'), mock_pd.mock_calls
        )  # This is to make sure the proper value is sent to pandas. We don't need to unittest that pandas handles the
        # information correctly.

*I was not able to test this so there might be some bugs I need to fix

My Most Recent Code for Testing

Recommended topics

Hot tags