This example expands on the previous folder creation example. It creates a folder, uploads a file to the folder, created a simSearch (Similarity Engine) instance, queries the instance, then deletes the instance, file and folder. It takes the classic ‘Santa Fe A’ time series dataset and creates a windowed version for similarity searching. Ask your InRule Technology contact for access to the 1000 element dataset.
# Example ML Studio API usage
#
# This example program:
# 1. Reads a time series data file (the well known Santa Fe A sample)
# 2. Reformats the data into overlapping windows
# 3. Writes the windowed data as a new file and stores the data in a matrix
# 4. Creates a folder to hold the file in the ML Studio cloud
# 5. Uploads the windowed file to the ML Studio cloud
# 6. Creates a simSearch model based on the windowed data file
# 7. Waits for the simSearch model to be ready for queries
# 8. Queries with a partial window
# 9. Uses the query results to index into the matrix to find matches
# 10. Displays the results overlaid on the original data
# 11. Cleans up: remove model instance, data file, foldername
#
# Since this cleans up the artifacts that were created, it can be rerun
# without any outside intervention.
#
# Requires running under Python 3.x to avoid SSL request issues.
#
# Copyright 2017-2021 (c) InRule Technology, Inc.
import csv # For writing the windowed file
import base64 # For encoding password
import string # For byte array to char array conversion
import json # For parsing JSON output from API status requests
import os # For getting the size of the file to upload
import time # For sleeping between status requests
import requests # For HTTP requests (http://docs.python-requests.org)
# Username and password is defined outside this file. Format
# of the credentials file:
# busername = b'your username'
# bpassword = b'your password'
# username = 'your username'
# password = 'your password'
import credentials as cred
# Network vars for requests
# Replace "YOURSERVER" or "YOURSERVER.simmachines.com" with your installation URL
cloudIpandPort = 'YOURSERVER.simmachines.com:8443'
rfprotocol = 'https'
b64password = base64.b64encode(cred.bpassword)
b64password = b64password.decode('ascii')
authform = cred.busername + b':' + cred.bpassword
print('before b64encoding: ', authform)
filepassword = base64.b64encode(authform)
filepassword = filepassword.decode('ascii')
filepassword = 'Basic ' + filepassword
print('filepassword: ', filepassword)
baseurl = rfprotocol + '://' + cloudIpandPort + '/cloud'
createfolder_URL = baseurl + '/createFolder'
uploadfile_URL = baseurl + '/uploadFile'
createinstance_URL = baseurl + '/createInstance'
listinstances_URL = baseurl + '/listInstances'
query_URL = baseurl + '/query'
preRemoveInstance_URL = baseurl + '/preRemoveInstance'
removeInstance_URL = baseurl + '/removeInstance'
removeFile_URL = baseurl + '/removeFile'
removeFolder_URL = baseurl + '/removeFolder'
filebasename = 'SF_A'
original_filename = filebasename + '.dat'
windowed_filename = filebasename + '_reformated_windows' + '.csv'
folder_name = 'TimeseriesFolder'
instance_name = 'TimeseriesModel'
window = 5
with open(original_filename, 'r') as f:
reader = csv.reader(f)
dlist = list(reader)
f.close()
# dlist = [[ list of data ]]
timeseries_data = dlist[0]
wts_data = [] # will store the windowed time series data
with open(windowed_filename, 'w') as r:
# Header
r.write('Window,Obs1,Obs2,Obs3,Obs4,Obs5\n')
# Write one window row
for i in range(len(timeseries_data) - (window - 1)):
wts_row = []
r.write(str(i) + ',')
for j in range(4):
r.write(timeseries_data[i+j] + ',')
wts_row.append(timeseries_data[i+j])
# Add row into windowed time series data matrix
wts_data.append(wts_row)
r.write(timeseries_data[i+(window - 1)]+'\n')
r.flush()
r.close()
print('createfolder_URL:', createfolder_URL)
createfolder_data = {'folderName': folder_name}
print('createfolder_data: ', createfolder_data)
resp = requests.post(createfolder_URL, data=createfolder_data,
auth=HTTPBasicAuth(cred.username, cred.password))
print("Response: ", resp)
print("Response: ", resp.content)
# Upload SFA.csv file to folder testfolder01 (which already exists)
print('uploadfileURL:', uploadfile_URL)
# Need to pass file size to uploadfile API
filesize = os.path.getsize(windowed_filename)
print('Size of SFA.csv file: ', filesize)
# Need a file stream to pass as data to the uploadfile API
with open(windowed_filename, 'rb') as f:
file_stream = {'fileData': f}
file_data = {'fileName': windowed_filename,
'fileSize': filesize,
'folderName': folder_name,
'authorization': filepassword}
resp = requests.post(uploadfile_URL, files=file_stream, data=file_data,
auth=HTTPBasicAuth(cred.username, cred.password))
print("Response: ", resp)
print("Response: ", resp.content)
print('')
# Create a simSearch instance based on SFA.csv file already uploaded to different
# folder: testfolder02
print('createinstance_URL:', createinstance_URL)
instance_data = {
"instanceName": instance_name,
"folderName": folder_name,
"modelType": "simSearch",
"params":
"COLUMNS=Window:ID,Obs1:REAL,Obs2:REAL,Obs3:REAL,Obs4:REAL,Obs5:REAL_@_@_K=10_@_@_PIVOTS=256_@_@_PROBABILITY=0.95_@_@_ACCEPTED_ERROR=1.2_@_@_PIVOT_SAMPLE_SIZE=20000_@_@_CACHE_SIZE=1000000_@_@_INDEX_COUNT=3_@_@_MAXIMUM_BYTES_PER_OBJECT=500000_@_@_INDEX_SAMPLE_SIZE=100",
"storage": 1,
"parallelism": 2,
"authorization": filepassword
}
print('instance_data:', instance_data)
resp = requests.post(createinstance_URL, data=instance_data,
auth=HTTPBasicAuth(cred.username, cred.password))
print("Response: ", resp)
print("Response: ", resp.content)
print('')
print('listinstances_URL:', listinstances_URL)
# Function to get the status of a model instance
def getStatus(instance_str):
resp = requests.get(listinstances_URL, auth=HTTPBasicAuth(cred.username, cred.password))
# parse JSON returned about all instances
resp_JSON = json.loads(resp.content.decode('ascii'))
# loop through list of instances
jlist = resp_JSON['list']
status = 'unknown'
for l in jlist:
if (l['label'] == instance_str):
status = l['status']
break
return status
print('')
# Check for status every 2 seconds
# (This is a small file, therefore 2 seconds per check is reasonable
# for a larger training dataset this wait time might be minutes.)
delay_for_check = 3 # seconds
max_checks = 20
for i in range(max_checks):
time.sleep(delay_for_check)
if (getStatus(instance_name) == "RUNNING"):
break
if i >= max_checks:
print('ERROR: After maximum checks' + str(max_checks)
+ ', status is still not RUNNING')
exit(1)
print('loops: ', i)
print('status: ', getStatus(instance_name))
print('query_URL:', query_URL)
query_data = {
"instanceName": instance_name,
"version": "V1",
"query": "Window\tObs1\tObs2\tObs3\tObs4\tObs5\n\t73\t30\t20\t19\t",
"authorization": filepassword
}
resp = requests.post(query_URL, data=query_data,
auth=HTTPBasicAuth(cred.username, cred.password))
print("Response: ", resp)
print("Response: ", resp.content)
answer_list = [int(n[0]) for n
in [x.split() for x in resp.content.decode().split(',')]]
print('\nQuery:')
print(' [73, 30, 20, 19, ?]')
print('\nPredictions:')
for answer in answer_list:
print('Answer: ', answer)
print(' Window: ', wts_data[answer])
# Clean up
# Delete Model Instance
print('preRemoveInstance_URL:', preRemoveInstance_URL)
preRemove_instance_data = {
"instanceName": instance_name,
"version": "V1",
"authorization": filepassword
}
print('preRemove_instance_data:', preRemove_instance_data)
resp = requests.post(preRemoveInstance_URL, data=preRemove_instance_data,
auth=HTTPBasicAuth(cred.username, cred.password))
print("Response: ", resp)
print("Response: ", resp.content)
print('\nremoveInstance_URL:', removeInstance_URL)
remove_instance_data = {
'instanceName': instance_name,
'version': 'V1',
'forceDelete': 'true',
'authorization': filepassword
}
print('remove_instance_data:', remove_instance_data)
resp = requests.delete(removeInstance_URL, params=remove_instance_data,
auth=HTTPBasicAuth(cred.username, cred.password))
print("Response: ", resp)
print("Response: ", resp.content)
# Delete File
print('\nremoveFile_URL:', removeFile_URL)
remove_file_data = {'fileName': windowed_filename,
'folderName': folder_name,
'authorization': filepassword}
print('remove_file_data:', remove_file_data)
resp = requests.delete(removeFile_URL, params=remove_file_data,
auth=HTTPBasicAuth(cred.username, cred.password))
print("Response: ", resp)
print("Response: ", resp.content)
# Delete Folder
print('\nremoveFolder_URL:', removeFolder_URL)
remove_folder_data = {'folderName': folder_name,
'authorization': filepassword}
print('remove_folder_data:', remove_folder_data)
resp = requests.delete(removeFolder_URL, params=remove_folder_data,
auth=HTTPBasicAuth(cred.username, cred.password))
print("Response: ", resp)
print("Response: ", resp.content)
Comments
0 comments
Please sign in to leave a comment.