import json import sys, os import Analyser import re ### VARIABLES Cast = [] #an array of cast members string = "sys.argv[1]" #the location of the current file we want to convert temp = "" #temp string spaces = [] #an array of spaces we want to check since some of the formating is lost and newlines are replaced by a bunch of spaces spacesRev = [] space = " " #a space var that is used to initiate the spaces list finishstring = "FINISHEDTHESTRINGS" #finished string meant to check for when we run a huge string through a while loop, when it's done splitting everything, the main string will change name to finishedstring ### VAR INITIATERS ## FILS THE SPACES LIST WITH AMOUNTS OF SPACES WE WILL WANT TO SPLIT FROM. for x in range(0, 100): space = space+" " spaces.append(space) for i in reversed(spaces): spacesRev.append(i) ### FUNCTIONS ##SPLITTER #takes two arguments, the string to split and the array to append the part it split. Checks for any amount of spaces longer then 1 space def splitter(string, array): global spacesRev #get all spaces for comparison global finishstring #get universally set finishstring index = -1 lowestStart = -1 for space in spaces: m = re.search(space, string) if m is not None: tempStart = m.start() tempEnd = m.end() else: index=-1 continue if (tempEnd > index or index == -1) and (tempStart <= lowestStart or lowestStart == -1): index = tempStart lowestStart = tempEnd if index > -1: beforeSplit = string[:index] afterSplit = string[lowestStart:] array.append(beforeSplit) return afterSplit else: array.append(string) return finishstring def Folder(folder): global Cast for file in os.listdir(folder): if file.endswith(".txt"): filename = file[:3]; with open(folder+"/"+file, "r") as infile: fileNaam=infile.read().replace('\n', ' ') fileNaam = fileNaam.replace('\t', ' ') x = 0 while fileNaam != finishstring: fileNaam = splitter(fileNaam, Cast) if(x < len(fileNaam)) and x is not 0: break x = len(fileNaam) print('only ' + str(x) + " characters left from file " + folder+"/"+file + ", rest is being written to Output/" + folder+"/"+file + '.json') with open('Output/'+folder+"/"+file+'.json', 'w+') as outfile: json.dump(Cast, outfile) if "CAST" in Cast: CheckCast = Cast.index("CAST") tempAr =Cast[CheckCast:] if "STAR TREK: THE NEXT GENERATION" in tempAr: endOfCast = tempAr.index("STAR TREK: THE NEXT GENERATION") else: endOfCast = -1 else: CheckCast = -1 endOfCast = -1 if (CheckCast is not -1) and (endOfCast is not -1): print(CheckCast) print(endOfCast) with open('Output/'+file+'-Casts.json', 'w+') as outfile: temp = Cast[CheckCast+1:CheckCast+endOfCast] print(temp) json.dump(temp, outfile) Cast = [] def File(file): global Cast with open(file, "r") as infile: fileNaam=infile.read().replace('\n', ' ') fileNaam = fileNaam.replace('\t', ' ') x = 0 while fileNaam != finishstring: fileNaam = splitter(fileNaam, Cast) if(x < len(fileNaam)) and x is not 0: break x = len(fileNaam) print('only ' + str(x) + " characters left from file " + file + ", rest is being written to Output/" + file + '.json') with open('Output/'+file+'.json', 'w+') as outfile: json.dump(Cast, outfile) if "CAST" in Cast: CheckCast = Cast.index("CAST") tempAr =Cast[CheckCast:] if "STAR TREK: THE NEXT GENERATION" in tempAr: endOfCast = tempAr.index("STAR TREK: THE NEXT GENERATION") else: endOfCast = -1 else: CheckCast = -1 endOfCast = -1 if (CheckCast is not -1) and (endOfCast is not -1): print(CheckCast) print(endOfCast) with open('Output/'+file+'-Casts.json', 'w+') as outfile: temp = Cast[CheckCast+1:CheckCast+endOfCast] print(temp) json.dump(temp, outfile) Cast = [] def Main(): if len(sys.argv) > 1: if (os.path.isdir(sys.argv[1])): print("folder") Folder(sys.argv[1]) if (os.path.isfile(sys.argv[1])): print("file") File(sys.argv[1]) else: print("Invalid file") input() else: print('no argument') input() Main()