# File: searchTimer.py # Purpose: Compare the runtime of searching operations # Author: Tanya Amert import matplotlib.pyplot as plt import random import time def linearSearch(mylist, elt): """ Performs linear search on a list, looking for a given element elt. Returns: the index of elt in mylist, or -1 if it is not found. """ for i in range(len(mylist)): if mylist[i] == elt: return i return -1 def binarySearch(mylist, elt): """ Performs binary search on a sorted list, looking for a given element elt. Returns: the index of elt in mylist, or -1 if it is not found. """ low = 0 high = len(mylist) - 1 # Go until the range flips (then you know you haven't found it) while low <= high: mid = (low+high) // 2 # choose the middle index item = mylist[mid] if item == elt: # yay! return mid elif elt < item: # left half high = mid-1 else: # right half low = mid+1 # No dice return -1 def runTrials(n, numTrials): """ Times searching a dictionary or sorted list of size n over numTrials trials. Returns: a dict mapping approach (str) to average runtime (float, in seconds) """ sortedList = list(range(n)) dictionary = {} for val in sortedList: dictionary[val] = val*2 # the values don't matter here time_dict_in = 0 time_dict_notin = 0 time_list_in = 0 time_list_notin = 0 time_ls_in = 0 time_ls_notin = 0 time_bs_in = 0 time_bs_notin = 0 for i in range(numTrials): element = random.choice(sortedList) # Look for an element using *in* with a dictionary t = time.time() check = element in dictionary time_dict_in += (time.time() - t) # Look for a missing element using *in* with a dictionary t = time.time() check = -1 in dictionary time_dict_notin += (time.time() - t) # Look for an element using *in* with a list t = time.time() check = element in sortedList time_list_in += (time.time() - t) # Look for a missing element using *in* with a list t = time.time() check = -1 in sortedList time_list_notin += (time.time() - t) # Look for the element using linear search t = time.time() linearSearch(sortedList, element) time_ls_in += (time.time() - t) # Look for a missing element using linear search t = time.time() linearSearch(sortedList, -1) time_ls_notin += (time.time() - t) # Look for the element using binary search t = time.time() binarySearch(sortedList, element) time_bs_in += (time.time() - t) # Look for a missing element using binary search t = time.time() binarySearch(sortedList, -1) time_bs_notin += (time.time() - t) return {"in_dict_found": time_dict_in, "in_dict_notfound": time_dict_notin, "in_list_found": time_list_in, "in_list_notfound": time_list_notin, "linearsearch_found": time_ls_in, "linearsearch_notfound": time_ls_notin, "binarysearch_found": time_bs_in, "binarysearch_notfound": time_bs_notin} def timeSortedIn(): """ Uses "in" operator on dictionaries or sorted lists of different sizes, and plots the runtimes. """ # Parameters nvals = [10, 100, 1000, 10000, 100000, 1000000] num_trials = 1000 # Prep lists to store results all_times_in_dict = [] all_times_notin_dict = [] all_times_in_list = [] all_times_notin_list = [] all_times_in_linearsearch = [] all_times_notin_linearsearch = [] all_times_in_binarysearch = [] all_times_notin_binarysearch = [] # Try all different values of n for n in nvals: # Get data for this value of n print("Working on n:", n) trial_results = runTrials(n, num_trials) # Append the average time to the appropriate lists all_times_in_dict.append(trial_results["in_dict_found"] / num_trials * 1000) all_times_notin_dict.append(trial_results["in_dict_notfound"] / num_trials * 1000) all_times_in_list.append(trial_results["in_list_found"] / num_trials * 1000) all_times_notin_list.append(trial_results["in_list_notfound"] / num_trials * 1000) all_times_in_binarysearch.append(trial_results["binarysearch_found"] / num_trials * 1000) all_times_notin_binarysearch.append(trial_results["binarysearch_notfound"] / num_trials * 1000) all_times_in_linearsearch.append(trial_results["linearsearch_found"] / num_trials * 1000) all_times_notin_linearsearch.append(trial_results["linearsearch_notfound"] / num_trials * 1000) # Make the plot plt.figure() plt.plot(nvals, all_times_notin_linearsearch, label="linear search - not found (ms)", marker='^') plt.plot(nvals, all_times_in_linearsearch, label="linear search - found (ms)", marker='s') plt.plot(nvals, all_times_notin_list, label="in with list - not found (ms)", marker='x') plt.plot(nvals, all_times_in_list, label="in with list - found (ms)", marker='o') plt.plot(nvals, all_times_notin_binarysearch, label="binary search - not found (ms)", marker='2') plt.plot(nvals, all_times_in_binarysearch, label="binary search - found (ms)", marker='*') plt.plot(nvals, all_times_notin_dict, label="in with dict - not found (ms)", marker='d') plt.plot(nvals, all_times_in_dict, label="in with dict - found (ms)", marker='+') # Spiff up our plot plt.title("Time to search as n gets really big") plt.xlabel("Number of elements in the list/dict (note HUGE scale)") plt.ylabel("Search time (ms)") plt.legend() plt.show() def main(): timeSortedIn() if __name__ == "__main__": main()