>>> import theano
>>> a = theano.tensor.vector("a") # declare symbolic variable
>>> b = a + a ** 10 # build symbolic expression
>>> f = theano.function([a], b) # compile function
>>> print f([0, 1, 2]) # prints `array([0, 2, 1026])`
Unoptimized graph | Optimized graph |
---|---|
![]() |
![]() |
Symbolic programming = Paradigm shift: people need to use it to understand it.
import theano
a = theano.tensor.vector() # declare variable
out = a + a ** 10 # build symbolic expression
f = theano.function([a], out) # compile function
print f([0, 1, 2])
# prints `array([0, 2, 1026])`
theano.printing.pydotprint_variables(b, outfile="f_unoptimized.png", var_with_name_simple=True)
theano.printing.pydotprint(f, outfile="f_optimized.png", var_with_name_simple=True)
Modify and execute the example to do this expression: a ** 2 + b ** 2 + 2 * a * b
Logistic Regression
import numpy
import theano
import theano.tensor as tt
rng = numpy.random
N = 400
feats = 784
D = (rng.randn(N, feats), rng.randint(size=N, low=0, high=2))
training_steps = 10000
# Declare Theano symbolic variables
x = tt.matrix("x")
y = tt.vector("y")
w = theano.shared(rng.randn(feats), name="w")
b = theano.shared(0., name="b")
print "Initial model:"
print w.get_value(), b.get_value()
# Construct Theano expression graph
p_1 = 1 / (1 + tt.exp(-tt.dot(x, w) - b)) # Probability that target = 1
prediction = p_1 > 0.5 # The prediction thresholded
xent = -y * tt.log(p_1) - (1 - y) * tt.log(1 - p_1) # Cross-entropy loss
cost = xent.mean() + 0.01 * (w ** 2).sum() # The cost to minimize
gw, gb = tt.grad(cost, [w, b])
# Compile
train = theano.function(
inputs=[x, y],
outputs=[prediction, xent],
updates=[(w, w - 0.1 * gw),
(b, b - 0.1 * gb)],
name='train')
predict = theano.function(inputs=[x], outputs=prediction,
name='predict')
# Train
for i in range(training_steps):
pred, err = train(D[0], D[1])
print "Final model:"
print w.get_value(), b.get_value()
print "target values for D:", D[1]
print "prediction on D:", predict(D[0])
Optimizations:
Where are those optimization applied?
p_1 = 1 / (1 + tt.exp(-tt.dot(x, w) - b))
# 1 / (1 + tt.exp(var)) -> sigmoid(var)
xent = -y * tt.log(p_1) - (1 - y) * tt.log(1 - p_1)
# Log(1-sigmoid(var)) -> -sigmoid(var)
prediction = p_1 > 0.5
cost = xent.mean() + 0.01 * (w ** 2).sum()
gw,gb = tt.grad(cost, [w, b])
train = theano.function(
inputs=[x, y],
outputs=[prediction, xent],
# w - 0.1 * gw: GEMV with the dot in the grad
updates=[(w, w - 0.1 * gw),
(b, b - 0.1 * gb)])
Theano can be configured with flags. They can be defined in two ways
import numpy
import theano
import theano.tensor as tt
rng = numpy.random
N = 400
feats = 784
D = (rng.randn(N, feats).astype(theano.config.floatX),
rng.randint(size=N,low=0, high=2).astype(theano.config.floatX))
training_steps = 10000
# Declare Theano symbolic variables
x = tt.matrix("x")
y = tt.vector("y")
w = theano.shared(rng.randn(feats).astype(theano.config.floatX), name="w")
b = theano.shared(numpy.asarray(0., dtype=theano.config.floatX), name="b")
x.tag.test_value = D[0]
y.tag.test_value = D[1]
#print "Initial model:"
#print w.get_value(), b.get_value()
# Construct Theano expression graph
p_1 = 1 / (1 + tt.exp(-tt.dot(x, w) - b)) # Probability of having a one
prediction = p_1 > 0.5 # The prediction that is done: 0 or 1
xent = -y * tt.log(p_1) - (1 - y) * tt.log(1 - p_1) # Cross-entropy
cost = xent.mean() + 0.01 * (w**2).sum() # The cost to optimize
gw,gb = tt.grad(cost, [w, b])
# Compile expressions to functions
train = theano.function(
inputs=[x, y],
outputs=[prediction, xent],
updates={w: w - 0.01 * gw, b: b - 0.01 * gb},
name="train")
predict = theano.function(inputs=[x], outputs=prediction,
name="predict")
if any([x.op.__class__.__name__=='Gemv' for x in
train.maker.fgraph.toposort()]):
print 'Used the cpu'
elif any([x.op.__class__.__name__=='GpuGemm' for x in
train.maker.fgraph.toposort()]):
print 'Used the gpu'
else:
print 'ERROR, not able to tell if theano used the cpu or the gpu'
print train.maker.fgraph.toposort()
for i in range(training_steps):
pred, err = train(D[0], D[1])
#print "Final model:"
#print w.get_value(), b.get_value()
print "target values for D"
print D[1]
print "prediction on D"
print predict(D[0])
# Print the graph used in the slides
theano.printing.pydotprint(predict,
outfile="pics/logreg_pydotprint_predic.png",
var_with_name_simple=True)
theano.printing.pydotprint_variables(prediction,
outfile="pics/logreg_pydotprint_prediction.png",
var_with_name_simple=True)
theano.printing.pydotprint(train,
outfile="pics/logreg_pydotprint_train.png",
var_with_name_simple=True)
Modify and execute the example to run on CPU with floatX=float32
- Use device=gpu{0, 1, ...} to specify which GPU if you have more than one
- Shared variables with float32 dtype are by default moved to the GPU memory space
- Be sure to use floatX (theano.config.floatX) in your code
- Cast inputs before putting them into a shared variable
- Cast “problem”: int32 with float32 to float64
- Insert manual cast in your code or use [u]int{8,16}
- The mean operator is worked on to make the output stay in float32.
- tt.scalar, tt.vector, tt.matrix, tt.tensor3, tt.tensor4
- tt.[fdczbwil]vector (float32, float64, complex64, complex128, int8, int16, int32, int64)
- tt.vector to floatX dtype
- floatX: configurable dtype that can be float32 or float64.
- All are shortcuts to: tt.tensor(dtype, broadcastable=[False]*nd)
- Other dtype: uint[8,16,32,64], floatX
Creating symbolic variables: Broadcastability
Details regarding symbolic broadcasting...
>>> gw,gb = tt.grad(cost, [w,b])
Competitors: NumPy + SciPy, MATLAB, EBLearn, Torch5, numexpr