Decluttering my mind into the web ...
date posted: 2021-Mar-15, last edit date: 2021-Mar-15
import pandas as pd
# read sample input
df = pd.read_csv('test_input.csv')
# check
df.head()
user_id | time_install | event_name | time_event | |
---|---|---|---|---|
0 | 4c6065c9466bc68e324e316edfb0227ff7cccc6c | 43892.978137 | purchase | 43892.983808 |
1 | f3049eac4788ffd4482390f8333d7e1adbf4c5a1 | 43896.220370 | signup | 43896.220799 |
2 | f3049eac4788ffd4482390f8333d7e1adbf4c5a1 | 43896.220370 | purchase | 43896.221887 |
3 | 9c1e35e89a374207409ad05da6d69d43f427c5f2 | 43896.788056 | reopen | 43896.107708 |
4 | 723fba1295b9a7c8321bbc433f87629b90660582 | 43892.000046 | reopen | 43893.577488 |
df.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 50356 entries, 0 to 50355 Data columns (total 4 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 user_id 50356 non-null object 1 time_install 50356 non-null float64 2 event_name 50356 non-null object 3 time_event 50356 non-null float64 dtypes: float64(2), object(2) memory usage: 1.5+ MB
import sqlite3
# connect to a sqlite db (create one if does not exist)
sqlite_con = sqlite3.connect("test_db.sqlite")
sqlite_cur = sqlite_con.cursor()
# insert sample data into sqlite db
# if_exists{‘fail’, ‘replace’, ‘append’}, default ‘fail’
df.to_sql('test1', sqlite_con, if_exists='replace', index=False)
- NULL - INTEGER - REAL - TEXT - BLOB
# check stored table structure
query = "select sql from sqlite_master where name = 'test1';"
sqlite_cur.execute(query)
results = sqlite_cur.fetchall()
print(results[0][0])
CREATE TABLE "test1" ( "user_id" TEXT, "time_install" REAL, "event_name" TEXT, "time_event" REAL )
# close connection
sqlite_con.close()
# connect to a sqlite db (create one if does not exist)
sqlite_con = sqlite3.connect("test_db.sqlite")
sqlite_cur = sqlite_con.cursor()
query = 'select * from test1 limit 10;'
# iterate over sql result
for row in sqlite_cur.execute(query):
print(row)
('4c6065c9466bc68e324e316edfb0227ff7cccc6c', 43892.9781365741, 'purchase', 43892.9838078704) ('f3049eac4788ffd4482390f8333d7e1adbf4c5a1', 43896.2203703704, 'signup', 43896.2207986111) ('f3049eac4788ffd4482390f8333d7e1adbf4c5a1', 43896.2203703704, 'purchase', 43896.2218865741) ('9c1e35e89a374207409ad05da6d69d43f427c5f2', 43896.7880555556, 'reopen', 43896.1077083333) ('723fba1295b9a7c8321bbc433f87629b90660582', 43892.0000462963, 'reopen', 43893.5774884259) ('723fba1295b9a7c8321bbc433f87629b90660582', 43892.0000462963, 'signup', 43892.0025231481) ('97963ab19ae86316d639e3a181a156c6c9dd5be0', 43892.9100231481, 'purchase', 43893.0026157407) ('97963ab19ae86316d639e3a181a156c6c9dd5be0', 43892.9100231481, 'signup', 43892.9137152778) ('97963ab19ae86316d639e3a181a156c6c9dd5be0', 43892.9100231481, 'purchase', 43892.9711689815) ('3f299029526b58c482521f7ad6fa907eca31063b', 43894.1943287037, 'purchase', 43894.1980671296)
# another way of reading data
query = 'select * from test1 limit 10;'
sqlite_cur.execute(query)
results = sqlite_cur.fetchall()
print(results[0][3])
43892.9838078704
# close connection
sqlite_con.close()
# connect to a sqlite db (create one if does not exist)
sqlite_con = sqlite3.connect("test_db.sqlite")
sqlite_cur = sqlite_con.cursor()
query = 'select * from test1 limit 10;'
df = pd.read_sql_query(query, sqlite_con)
df.head()
user_id | time_install | event_name | time_event | |
---|---|---|---|---|
0 | 4c6065c9466bc68e324e316edfb0227ff7cccc6c | 43892.978137 | purchase | 43892.983808 |
1 | f3049eac4788ffd4482390f8333d7e1adbf4c5a1 | 43896.220370 | signup | 43896.220799 |
2 | f3049eac4788ffd4482390f8333d7e1adbf4c5a1 | 43896.220370 | purchase | 43896.221887 |
3 | 9c1e35e89a374207409ad05da6d69d43f427c5f2 | 43896.788056 | reopen | 43896.107708 |
4 | 723fba1295b9a7c8321bbc433f87629b90660582 | 43892.000046 | reopen | 43893.577488 |
df.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 10 entries, 0 to 9 Data columns (total 4 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 user_id 10 non-null object 1 time_install 10 non-null float64 2 event_name 10 non-null object 3 time_event 10 non-null float64 dtypes: float64(2), object(2) memory usage: 448.0+ bytes
# close connection
sqlite_con.close()
# | Post Title | Date Posted | Last Edit Date |
---|---|---|---|
1 | New tools for data analysis (ipython-sql and Azure Data Studio) | 2021-10-16 | 2021-10-16 |
2 | How to setup a remote jupyter server | 2021-10-13 | 2021-10-13 |
3 | How to setup ZSH | 2021-10-12 | 2021-10-12 |
4 | Concepts in Data Architecture | 2021-05-01 | 2021-10-16 |
5 | Loading data into PostgreSQL reference | 2021-03-19 | 2021-03-19 |
6 | current post -- SQLite <--> Pandas reference | 2021-03-15 | 2021-03-15 |
7 | Creating an automated data mart (in SQL) | 2020-07-16 | 2020-07-16 |
8 | Personal finance management and open banking | 2020-07-12 | 2020-07-12 |
9 | Quick SQL reference | 2020-07-03 | 2020-11-29 |
10 | How to back up Digital Ocean Spaces (S3 compatible storage) | 2020-06-07 | 2020-07-03 |
11 | PostgreSQL initiation | 2020-05-25 | 2020-06-01 |