#!/usr/bin/env python
import pdb
import logging
import yaml
import datetime
log = logging.getLogger(__name__)
[docs]class OfficerFeature():
def __init__(self, **kwargs):
self.description = ""
self.description_long =""
self.is_label = False
self.query = None
self.feature_name = self.__class__.__name__
self.is_categorical = False
self.type_of_imputation = None
self.set_null_counts_to_zero = False
# allow instantiation without kwargs
try:
# self.fake_today = kwargs["fake_today"]
self.as_of_date = kwargs["as_of_date"]
self.table_name = kwargs["table_name"]
except (KeyError, AttributeError):
log.info("WARNING: no fake today set for officer feature: {}".format(self.feature_name))
pass
[docs] def build_and_insert( self, engine ):
engine.execute( self.query )
if self.set_null_counts_to_zero:
# option to set all nulls to zeros (for use with counts)
update_query = ("UPDATE features.{0} SET {1} = 0 "
"WHERE {1} IS null; ".format(self.table_name, self.feature_name))
engine.execute( update_query )
[docs]class CategoricalOfficerFeature(OfficerFeature):
# class-defined wildcards for writing template queries.
COLUMN = "columnwildcardstring"
LOOKUPCODE = "lookupcodewildcardstring"
def __init__(self, **kwargs):
OfficerFeature.__init__(self, **kwargs)
self.type_of_imputation = "zero"
# generate a list of column names to insert into.
self.feature_column_names = []
for key in self.categories:
self.feature_column_names.append( self.feature_name + "_" + self.categories[key].replace( " ", "_" ) )
[docs] def build_and_insert( self, engine ):
for key in self.categories:
# get the column name.
column = self.feature_name + "_" + self.categories[key].replace(" ", "_")
# insert.
this_query = self.query
this_query = this_query.replace( self.COLUMN, column )
this_query = this_query.replace( self.LOOKUPCODE, str(key) )
engine.execute( this_query )
# if set_null_counts_to_zero update the column. This is similar to
# the build and execute above, and might could need to be refactored
# to deduplicate
if self.set_null_counts_to_zero:
# option to set all nulls to zeros (for use with counts)
update_query = ("UPDATE features.{0} SET {1} = 0 "
"WHERE {1} IS null; ".format(self.table_name, column))
engine.execute( update_query )
[docs]class TimeGatedCategoricalOfficerFeature(OfficerFeature):
# class-defined wildcards for writing template queries.
DURATION = "durationwildcardstring"
COLUMN = "columnwildcardstring"
LOOKUPCODE = "lookupcodewildcardstring"
def __init__(self, **kwargs):
OfficerFeature.__init__(self, **kwargs)
self.lookback_durations = kwargs[ "lookback_durations" ]
self.type_of_imputation = "zero"
self.feature_column_names = [ self.feature_name + "_" + duration.replace(" ","_") for duration in self.lookback_durations ]
# generate a list of column names to insert into.
self.feature_column_names = []
for duration in self.lookback_durations:
for key in self.categories:
self.feature_column_names.append( self.feature_name + "_" + self.categories[key].replace( " ", "_" ) + "_" + duration.replace(" ","_") )
[docs] def build_and_insert( self, engine ):
for duration in self.lookback_durations:
for key in self.categories:
# get the column name.
column = self.feature_name + "_" + self.categories[key].replace(" ", "_") + "_" + duration.replace(" ","_")
# insert.
this_query = self.query
this_query = this_query.replace( self.DURATION, duration )
this_query = this_query.replace( self.COLUMN, column )
this_query = this_query.replace( self.LOOKUPCODE, str(key) )
engine.execute( this_query )
# if set_null_counts_to_zero update the column. This is similar to
# the build and execute above, and might could need to be refactored
# to deduplicate
if self.set_null_counts_to_zero:
# option to set all nulls to zeros (for use with counts)
update_query = ("UPDATE features.{0} SET {1} = 0 "
"WHERE {1} IS null; ".format(self.table_name, column))
engine.execute( update_query )
[docs]class TimeGatedOfficerFeature(OfficerFeature):
# class-defined wildcards for writing template queries.
DURATION = "durationwildcardstring"
COLUMN = "columnwildcardstring"
def __init__(self, **kwargs):
OfficerFeature.__init__(self, **kwargs)
# allow instantiation without kwargs
try:
self.lookback_durations = kwargs[ "lookback_durations" ]
self.type_of_imputation = "zero"
self.feature_column_names = [ self.feature_name + "_" + duration.replace(" ","_") for duration in self.lookback_durations ]
except KeyError:
pass
[docs] def build_and_insert( self, engine ):
for duration, column in zip( self.lookback_durations, self.feature_column_names):
this_query = self.query
this_query = this_query.replace( self.DURATION, duration )
this_query = this_query.replace( self.COLUMN, column )
engine.execute( this_query )
# if set_null_counts_to_zero update the column. This is similar to
# the build and execute above, and might could need to be refactored
# to deduplicate
if self.set_null_counts_to_zero:
# option to set all nulls to zeros (for use with counts)
update_query = ("UPDATE features.{0} SET {1} = 0 "
"WHERE {1} IS null; ".format(self.table_name, column))
engine.execute( update_query )
[docs]class DispatchFeature():
def __init__(self, **kwargs):
try:
self.from_date = kwargs["from_date"]
self.to_date = kwargs["to_date"]
self.table_name = kwargs["table_name"]
except KeyError:
pass
self.description = ""
self.description_long =""
self.feature_name = self.__class__.__name__
self.is_categorical = False
self.is_label = False
# self.query should return two columns, named 'dispatch_id' and '<feature_name>'
self.query = None
# self.update_query take the result of the feature query and inserts it into the feature table
self.update_query = ("CREATE UNLOGGED TABLE features_prejoin.{feature_name} "
" AS ({query}); "
"CREATE UNIQUE INDEX {feature_name}_index "
" ON features_prejoin.{feature_name} (dispatch_id);"
"ALTER TABLE features_prejoin.{feature_name} "
" ADD PRIMARY KEY USING INDEX {feature_name}_index;")
# allow instantiation without kwargs
try:
self.table_name = kwargs["table_name"]
except KeyError:
pass
[docs] def build_and_insert(self, engine):
build_query = self.update_query.format(
feature_name = self.feature_name,
query = self.query)
engine.execute(build_query)
[docs]class DispatchTimeBoundedFeature(DispatchFeature):
pass