Pythonで「〜美人」を分析する
#hide
import warnings
warnings.simplefilter('ignore')
from IPython.display import set_matplotlib_formats
set_matplotlib_formats('png')
import sqlite3
import pandas as pd
conn = sqlite3.connect('data/sakenowaRDB.db')
query = (
'SELECT brands.name as 銘柄名, breweries.name as 蔵元名, areas.name as 都道府県 '
'FROM brands LEFT JOIN breweries ON brands.breweryId = breweries.id '
'LEFT JOIN areas ON breweries.areaId = areas.id '
'WHERE brands.name LIKE "%美人%"'
)
pd.read_sql_query(query, conn)
.dataframe tbody tr th {
vertical-align: top;
}
.dataframe thead th {
text-align: right;
}
query = (
'SELECT brands.name as 銘柄名, breweries.name as 蔵元名, areas.name as 都道府県, ovr.rank, ovr.score '
'FROM brands '
'LEFT JOIN breweries ON brands.breweryId = breweries.id '
'LEFT JOIN areas ON breweries.areaId = areas.id '
'LEFT JOIN overall_ranking as ovr ON brands.id = ovr.brandId '
'WHERE brands.name LIKE "%美人%" AND brands.name NOT LIKE "%福久%" AND brands.name NOT LIKE "%蔵部%" '
'AND ovr.rank IS NOT NULL'
)
pd.read_sql_query(query, conn)
.dataframe tbody tr th {
vertical-align: top;
}
.dataframe thead th {
text-align: right;
}
query = (
'SELECT brands.name as 銘柄名, breweries.name as 蔵元名, areas.name as 都道府県, rank.rank, rank.score '
'FROM brands '
'LEFT JOIN breweries ON brands.breweryId = breweries.id '
'LEFT JOIN areas ON breweries.areaId = areas.id '
'LEFT JOIN rankings as rank ON brands.id = rank.brandId '
'WHERE brands.name LIKE "%美人%" AND brands.name NOT LIKE "%福久%" AND brands.name NOT LIKE "%蔵部%" '
'AND rank.rank IS NOT NULL'
)
pd.read_sql_query(query, conn)
.dataframe tbody tr th {
vertical-align: top;
}
.dataframe thead th {
text-align: right;
}
# 日本語を使うための設定。各自の環境による。
import matplotlib as mpl
mpl.rcParams['font.family'] = 'IPAGothic'
import matplotlib.pyplot as plt
query = (
'SELECT brands.name as 銘柄名, breweries.name as 蔵元名, areas.name as 都道府県, fc.flavor, fc.score '
'FROM brands '
'LEFT JOIN breweries ON brands.breweryId = breweries.id '
'LEFT JOIN areas ON breweries.areaId = areas.id '
'LEFT JOIN flavor_charts as fc ON brands.id = fc.brandId '
'WHERE brands.name LIKE "%美人%" AND brands.name NOT LIKE "%福久%" AND brands.name NOT LIKE "%蔵部%" '
'AND fc.flavor IS NOT NULL'
)
df_fc = pd.read_sql_query(query, conn)
print(df_fc.銘柄名.unique())
fig = plt.figure(figsize=(9,6))
for i, name in enumerate(df_fc.銘柄名.unique()):
ax = fig.add_subplot(2,3,i+1)
df_fc[df_fc.銘柄名 == name].plot.barh(x="flavor",y="score",title=name,ax=ax,legend=None,
xlim=(0., 1.))
fig.subplots_adjust(wspace=0.5, hspace=0.5)
plt.show()
df_fc.groupby(['flavor']).mean().plot.barh(title='平均的美人', legend=None, xlim=(0., 1.))
query = (
'SELECT brands.name as 銘柄名, breweries.name as 蔵元名, areas.name as 都道府県, fc.flavor, fc.score '
'FROM brands '
'LEFT JOIN breweries ON brands.breweryId = breweries.id '
'LEFT JOIN areas ON breweries.areaId = areas.id '
'LEFT JOIN flavor_charts as fc ON brands.id = fc.brandId '
'WHERE brands.name NOT LIKE "%美人%" AND brands.name NOT LIKE "%福久%" AND brands.name NOT LIKE "%蔵部%" '
'AND fc.flavor IS NOT NULL'
)
df_fc = pd.read_sql_query(query, conn)
df_fc.groupby(['flavor']).mean().plot.barh(title='平均的不美人', legend=None, xlim=(0., 1.))
query = (
'SELECT brands.name as 銘柄名, ft.tag '
'FROM brands '
'LEFT JOIN brand_flavor_tags as bft ON brands.id = bft.brandId '
'LEFT JOIN flavor_tags as ft ON bft.tagId = ft.id '
'WHERE brands.name LIKE "%美人%" AND brands.name NOT LIKE "%福久%" AND brands.name NOT LIKE "%蔵部%" '
'AND ft.tag IS NOT NULL'
)
df_tag = pd.read_sql_query(query, conn)
df_tag.銘柄名.unique()
df_tag.groupby(['tag']).count().sort_values(ascending=False, by='銘柄名').query('銘柄名>2')