Skip to content

Commit

Permalink
city current filter outlier for all dimensions
Browse files Browse the repository at this point in the history
  • Loading branch information
Nik Sauer committed Feb 20, 2025
1 parent ba3683f commit a06e118
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 3 deletions.
3 changes: 3 additions & 0 deletions code/enums.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,9 @@ class Dimension():
NO2: "no2_ppb",
}

# outlider factor
IQR_FACTOR = 3

_filter_thresholds = {
PM2_5: (0, 999)
}
Expand Down
22 changes: 19 additions & 3 deletions code/routers/city.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import json
import numpy as np
from geopy.geocoders import Nominatim
from fastapi import APIRouter, Depends, HTTPException, Query, Response
from sqlalchemy.orm import Session
Expand Down Expand Up @@ -65,7 +66,7 @@ async def get_average_measurements_by_city(
q = (
db.query(
Values.dimension,
func.avg(Values.value),
func.array_agg(Values.value),
func.count(Values.id),
func.count(distinct(Station.id)),
)
Expand All @@ -78,14 +79,29 @@ async def get_average_measurements_by_city(
.filter(Values.value != 'nan')
.filter(Measurement.time_measured >= start)
# filter outlier
.filter(or_(Values.dimension != Dimension.PM2_5, and_(LOWER <= Values.value, Values.value <= UPPER)))
#.filter(or_(Values.dimension != Dimension.PM2_5, and_(LOWER <= Values.value, Values.value <= UPPER)))
.group_by(Values.dimension)
)

print(len(q.all()))

station_count = db.query(Station).join(Location).join(City).filter(City.slug == city_slug).count()

# filter outlier with Quartiles
data = []
for dim, val_list, val_count, s_cnt in q.all():
a = np.array(val_list)
q1 = np.percentile(a, 25)
q3 = np.percentile(a, 75)
iqr = (q3 - q1)

l = q1 - iqr * Dimension.IQR_FACTOR
r = q3 + iqr * Dimension.IQR_FACTOR

b = a[(a >= l) & (a <= r)]

data.append((dim, np.mean(b), val_count, s_cnt))

j = {
"type": "Feature",
"geometry": {
Expand All @@ -104,7 +120,7 @@ async def get_average_measurements_by_city(
"value": val,
"value_count": val_count,
"station_count": s_cnt
} for dim, val, val_count, s_cnt in q.all()],
} for dim, val, val_count, s_cnt in data],
}
}

Expand Down

0 comments on commit a06e118

Please sign in to comment.