Python Example of Time Series Query

  • Updated

This example expands on the previous folder creation example. It creates a folder, uploads a file to the folder, created a simSearch (Similarity Engine) instance, queries the instance, then deletes the instance, file and folder. It takes the classic ‘Santa Fe A’ time series dataset and creates a windowed version for similarity searching. Ask your InRule Technology contact for access to the 1000 element dataset.

# Example ML Studio API usage
#
# This example program:
#  1. Reads a time series data file (the well known Santa Fe A sample)
#  2. Reformats the data into overlapping windows
#  3. Writes the windowed data as a new file and stores the data in a matrix
#  4. Creates a folder to hold the file in the ML Studio cloud
#  5. Uploads the windowed file to the ML Studio cloud
#  6. Creates a simSearch model based on the windowed data file
#  7. Waits for the simSearch model to be ready for queries
#  8. Queries with a partial window
#  9. Uses the query results to index into the matrix to find matches
# 10. Displays the results overlaid on the original data
# 11. Cleans up: remove model instance, data file, foldername
#
# Since this cleans up the artifacts that were created, it can be rerun
# without any outside intervention.
#
# Requires running under Python 3.x to avoid SSL request issues.
#
# Copyright 2017-2021 (c) InRule Technology, Inc.

import csv          # For writing the windowed file
import base64       # For encoding password
import string       # For byte array to char array conversion
import json         # For parsing JSON output from API status requests
import os           # For getting the size of the file to upload
import time         # For sleeping between status requests
import requests     # For HTTP requests (http://docs.python-requests.org)

# Username and password is defined outside this file. Format
# of the credentials file:
#   busername = b'your username'
#   bpassword = b'your password'
#   username = 'your username'
#   password = 'your password'

import credentials as cred

# Network vars for requests

# Replace "YOURSERVER" or "YOURSERVER.simmachines.com" with your installation URL
cloudIpandPort = 'YOURSERVER.simmachines.com:8443'
rfprotocol = 'https'
b64password = base64.b64encode(cred.bpassword)
b64password = b64password.decode('ascii')
authform = cred.busername + b':' + cred.bpassword
print('before b64encoding: ', authform)
filepassword = base64.b64encode(authform)
filepassword = filepassword.decode('ascii')
filepassword = 'Basic ' + filepassword
print('filepassword: ', filepassword)

baseurl = rfprotocol + '://' + cloudIpandPort + '/cloud'

createfolder_URL = baseurl + '/createFolder'
uploadfile_URL = baseurl + '/uploadFile'
createinstance_URL = baseurl + '/createInstance'
listinstances_URL = baseurl + '/listInstances'
query_URL = baseurl + '/query'
preRemoveInstance_URL = baseurl + '/preRemoveInstance'
removeInstance_URL = baseurl + '/removeInstance'
removeFile_URL = baseurl + '/removeFile'
removeFolder_URL = baseurl + '/removeFolder'

filebasename = 'SF_A'
original_filename = filebasename + '.dat'
windowed_filename = filebasename + '_reformated_windows' + '.csv'
folder_name = 'TimeseriesFolder'
instance_name = 'TimeseriesModel'
window = 5

with open(original_filename, 'r') as f:
    reader = csv.reader(f)
    dlist = list(reader)
f.close()
# dlist = [[ list of data ]]
timeseries_data = dlist[0]
wts_data = []  # will store the windowed time series data

with open(windowed_filename, 'w') as r:
    # Header
    r.write('Window,Obs1,Obs2,Obs3,Obs4,Obs5\n')
    # Write one window row
    for i in range(len(timeseries_data) - (window - 1)):
        wts_row = []
        r.write(str(i) + ',')
        for j in range(4):
            r.write(timeseries_data[i+j] + ',')
            wts_row.append(timeseries_data[i+j])
        # Add row into windowed time series data matrix
        wts_data.append(wts_row)
        r.write(timeseries_data[i+(window - 1)]+'\n')
    r.flush()
r.close()

print('createfolder_URL:', createfolder_URL)
createfolder_data = {'folderName': folder_name}
print('createfolder_data: ', createfolder_data)

resp = requests.post(createfolder_URL, data=createfolder_data,
                     auth=HTTPBasicAuth(cred.username, cred.password))
print("Response: ", resp)
print("Response: ", resp.content)

# Upload SFA.csv file to folder testfolder01 (which already exists)
print('uploadfileURL:', uploadfile_URL)

# Need to pass file size to uploadfile API

filesize = os.path.getsize(windowed_filename)
print('Size of SFA.csv file: ', filesize)

# Need a file stream to pass as data to the uploadfile API
with open(windowed_filename, 'rb') as f:
    file_stream = {'fileData': f}
    file_data = {'fileName': windowed_filename,
                 'fileSize': filesize,
                 'folderName': folder_name,
                 'authorization': filepassword}
    resp = requests.post(uploadfile_URL, files=file_stream, data=file_data,
                         auth=HTTPBasicAuth(cred.username, cred.password))
    print("Response: ", resp)
    print("Response: ", resp.content)

print('')

# Create a simSearch instance based on SFA.csv file already uploaded to different
# folder: testfolder02
print('createinstance_URL:', createinstance_URL)

instance_data = {
    "instanceName": instance_name,
    "folderName": folder_name,
    "modelType": "simSearch",
    "params":
"COLUMNS=Window:ID,Obs1:REAL,Obs2:REAL,Obs3:REAL,Obs4:REAL,Obs5:REAL_@_@_K=10_@_@_PIVOTS=256_@_@_PROBABILITY=0.95_@_@_ACCEPTED_ERROR=1.2_@_@_PIVOT_SAMPLE_SIZE=20000_@_@_CACHE_SIZE=1000000_@_@_INDEX_COUNT=3_@_@_MAXIMUM_BYTES_PER_OBJECT=500000_@_@_INDEX_SAMPLE_SIZE=100",
    "storage": 1,
    "parallelism": 2,
    "authorization": filepassword
}
print('instance_data:', instance_data)

resp = requests.post(createinstance_URL, data=instance_data,
                     auth=HTTPBasicAuth(cred.username, cred.password))
print("Response: ", resp)
print("Response: ", resp.content)

print('')

print('listinstances_URL:', listinstances_URL)

# Function to get the status of a model instance
def getStatus(instance_str):
    resp = requests.get(listinstances_URL, auth=HTTPBasicAuth(cred.username, cred.password))
    # parse JSON returned about all instances
    resp_JSON = json.loads(resp.content.decode('ascii'))

    # loop through list of instances
    jlist = resp_JSON['list']
    status = 'unknown'
    for l in jlist:
        if (l['label'] == instance_str):
            status = l['status']
            break
    return status

print('')

# Check for status every 2 seconds
# (This is a small file, therefore 2 seconds per check is reasonable
# for a larger training dataset this wait time might be minutes.)

delay_for_check = 3  # seconds
max_checks = 20
for i in range(max_checks):
    time.sleep(delay_for_check)
    if (getStatus(instance_name) == "RUNNING"):
        break

if i >= max_checks:
    print('ERROR: After maximum checks' + str(max_checks)
          + ', status is still not RUNNING')
    exit(1)

print('loops: ', i)
print('status: ', getStatus(instance_name))

print('query_URL:', query_URL)
query_data = {
    "instanceName": instance_name,
    "version": "V1",
    "query": "Window\tObs1\tObs2\tObs3\tObs4\tObs5\n\t73\t30\t20\t19\t",
    "authorization": filepassword
}
resp = requests.post(query_URL, data=query_data,
                     auth=HTTPBasicAuth(cred.username, cred.password))
print("Response: ", resp)
print("Response: ", resp.content)

answer_list = [int(n[0]) for n
               in [x.split() for x in resp.content.decode().split(',')]]

print('\nQuery:')
print(' [73, 30, 20, 19, ?]')
print('\nPredictions:')
for answer in answer_list:
    print('Answer: ', answer)
    print(' Window: ', wts_data[answer])

# Clean up
# Delete Model Instance
print('preRemoveInstance_URL:', preRemoveInstance_URL)

preRemove_instance_data = {
    "instanceName": instance_name,
    "version": "V1",
    "authorization": filepassword
}
print('preRemove_instance_data:', preRemove_instance_data)

resp = requests.post(preRemoveInstance_URL, data=preRemove_instance_data,
                     auth=HTTPBasicAuth(cred.username, cred.password))
print("Response: ", resp)
print("Response: ", resp.content)

print('\nremoveInstance_URL:', removeInstance_URL)

remove_instance_data = {
    'instanceName': instance_name,
    'version': 'V1',
    'forceDelete': 'true',
    'authorization': filepassword
}
print('remove_instance_data:', remove_instance_data)

resp = requests.delete(removeInstance_URL, params=remove_instance_data,
                       auth=HTTPBasicAuth(cred.username, cred.password))
print("Response: ", resp)
print("Response: ", resp.content)

# Delete File
print('\nremoveFile_URL:', removeFile_URL)
remove_file_data = {'fileName': windowed_filename,
                    'folderName': folder_name,
                    'authorization': filepassword}
print('remove_file_data:', remove_file_data)

resp = requests.delete(removeFile_URL, params=remove_file_data,
                       auth=HTTPBasicAuth(cred.username, cred.password))
print("Response: ", resp)
print("Response: ", resp.content)

# Delete Folder
print('\nremoveFolder_URL:', removeFolder_URL)
remove_folder_data = {'folderName': folder_name,
                      'authorization': filepassword}
print('remove_folder_data:', remove_folder_data)

resp = requests.delete(removeFolder_URL, params=remove_folder_data,
                       auth=HTTPBasicAuth(cred.username, cred.password))
print("Response: ", resp)
print("Response: ", resp.content)

Was this article helpful?

0 out of 0 found this helpful

Comments

0 comments

Please sign in to leave a comment.