The data was called Creative Class County Codes at Data.gov. I used the same zip codes from the earlier project and converted them to counties by downloading a spreadsheet from UnitedStatesZipCodes.org. The code follows:
#A helper function that is necessary to make sure the cost function never becomes infinite
def safeLog(array):
import numpy as np
def helper(x):
if x==0:
results=-1000000
else:
results=np.log(x)
return(results)
return(np.vectorize(helper)(array))
#The cost function that is to be minimizeddef cost(theta,X,indicator):
import numpy as np
import sigmoid as sig
import safeLog as sl
output = sig.sigmoid(np.dot(X,theta))
first = np.sum(indicator*sl.safeLog(output))
last = np.sum((np.ones(len(X))-indicator)*sl.safeLog(np.ones(len(X))-output))
return(-1/len(X)*(first+last))
#The cost gradient function that is useful for some minimizers
def costGradient(theta,X,indicator):
import numpy as np
import sigmoid as sig
output = sig.sigmoid(np.dot(X,theta))
return(1.0/len(X)*np.dot(X.T,(output-indicator)))
No comments:
Post a Comment