Layer-wise accurate timing
import time
def stat_helper(name, array):
"""wrapper for executor callback"""
import ctypes
from mxnet.ndarray import NDArray
from mxnet.base import NDArrayHandle, py_str
array = ctypes.cast(array, NDArrayHandle)
array = NDArray(array, writable=False) #asnumpy()
array.wait_to_read()
# print (name, array.shape, np.average(array), np.std(array), ('%.1fms' % (float(time.time()-stat_helper.start_time)*1000)))
print (name, array.shape, ('%.1fms' % (float(time.time()-stat_helper.start_time)*1000)))
stat_helper.start_time=time.time()
stat_helper.start_time=float(time.time())
executor.set_monitor_callback(stat_helper)
Visualize Network
# network visualization
dot = mx.viz.plot_network(fcnxs, shape={'data':(1,3,224,224)})
dot.view()
Mean Substraction
def transform(im, pixel_means):
im_tensor = np.zeros((1, 3, im.shape[0], im.shape[1]))
for i in range(3):
im_tensor[0, i, :, :] = im[:, :, 2 - i] - pixel_means[2 - i]
return im_tensor
Building on Jetson TX1
- Install OpenBLAS package: libopenblas-dev
- Install CUDA Development Toolkit 8.0 along with CUDNN
- Build OpenCV-3.2.0 with GTK-2.0/TBB, without Qt/CUDA, disable BUILD_PERF_TESTS,BUILD_DOCS,BUILD_TESTS,BUILD_EXAMPLES,
- Build MXNet-0.10.1 with CMAKE_BUILD_TYPE=Release, add -DMSHADOW_USE_SSE=0 to CMAKE_C_FLAGS_RELEASE and CMAKE_CXX_FLAGS_RELEASE
comments powered by Disqus