mirror of
https://github.com/bvanroll/startrek-ml.git
synced 2025-08-28 11:32:43 +00:00
119 lines
3.4 KiB
Python
119 lines
3.4 KiB
Python
import json
|
|
import sys, os
|
|
import Analyser
|
|
import re
|
|
|
|
|
|
|
|
### VARIABLES
|
|
|
|
Cast = [] #an array of cast members
|
|
string = "sys.argv[1]" #the location of the current file we want to convert
|
|
temp = "" #temp string
|
|
spaces = [] #an array of spaces we want to check since some of the formating is lost and newlines are replaced by a bunch of spaces
|
|
spacesRev = []
|
|
space = " " #a space var that is used to initiate the spaces list
|
|
finishstring = "FINISHEDTHESTRINGS" #finished string meant to check for when we run a huge string through a while loop, when it's done splitting everything, the main string will change name to finishedstring
|
|
|
|
|
|
### VAR INITIATERS
|
|
|
|
|
|
## FILS THE SPACES LIST WITH AMOUNTS OF SPACES WE WILL WANT TO SPLIT FROM.
|
|
for x in range(0, 100):
|
|
space = space+" "
|
|
spaces.append(space)
|
|
|
|
for i in reversed(spaces):
|
|
spacesRev.append(i)
|
|
|
|
### FUNCTIONS
|
|
|
|
##SPLITTER
|
|
#takes two arguments, the string to split and the array to append the part it split. Checks for any amount of spaces longer then 1 space
|
|
|
|
def splitter(string, array):
|
|
global spacesRev #get all spaces for comparison
|
|
global finishstring #get universally set finishstring
|
|
index = -1
|
|
lowestStart = -1
|
|
for space in spaces:
|
|
m = re.search(space, string)
|
|
if m is not None:
|
|
tempStart = m.start()
|
|
tempEnd = m.end()
|
|
else:
|
|
index=-1
|
|
continue
|
|
if (tempEnd > index or index == -1) and (tempStart <= lowestStart or lowestStart == -1):
|
|
index = tempStart
|
|
lowestStart = tempEnd
|
|
if index > -1:
|
|
beforeSplit = string[:index]
|
|
afterSplit = string[lowestStart:]
|
|
array.append(beforeSplit)
|
|
return afterSplit
|
|
else:
|
|
array.append(string)
|
|
return finishstring
|
|
|
|
def Folder(folder):
|
|
global Cast
|
|
for file in os.listdir(folder):
|
|
if file.endswith(".txt"):
|
|
File(folder+"/"+file)
|
|
|
|
def File(file):
|
|
global Cast
|
|
with open(file, "r") as infile:
|
|
fileNaam=infile.read().replace('\n', ' ')
|
|
fileNaam = fileNaam.replace('\t', ' ')
|
|
x = 0
|
|
while fileNaam != finishstring:
|
|
fileNaam = splitter(fileNaam, Cast)
|
|
if(x < len(fileNaam)) and x is not 0:
|
|
break
|
|
x = len(fileNaam)
|
|
print('only ' + str(x) + " characters left from file " + file + ", rest is being written to Output/" + file + '.json')
|
|
with open('Output/'+file+'.json', 'w+') as outfile:
|
|
json.dump(Cast, outfile)
|
|
if "CAST" in Cast:
|
|
CheckCast = Cast.index("CAST")
|
|
tempAr =Cast[CheckCast:]
|
|
if "STAR TREK: THE NEXT GENERATION" in tempAr:
|
|
endOfCast = tempAr.index("STAR TREK: THE NEXT GENERATION")
|
|
else:
|
|
endOfCast = -1
|
|
else:
|
|
CheckCast = -1
|
|
endOfCast = -1
|
|
|
|
if (CheckCast is not -1) and (endOfCast is not -1):
|
|
print(CheckCast)
|
|
print(endOfCast)
|
|
with open('Output/'+file+'-Casts.json', 'w+') as outfile:
|
|
temp = Cast[CheckCast+1:CheckCast+endOfCast]
|
|
print(temp)
|
|
json.dump(temp, outfile)
|
|
Cast = []
|
|
|
|
|
|
def Main():
|
|
if len(sys.argv) > 1:
|
|
if (os.path.isdir(sys.argv[1])):
|
|
print("folder")
|
|
Folder(sys.argv[1])
|
|
if (os.path.isfile(sys.argv[1])):
|
|
print("file")
|
|
File(sys.argv[1])
|
|
else:
|
|
print("Invalid file")
|
|
input()
|
|
else:
|
|
print('no argument')
|
|
input()
|
|
|
|
|
|
|
|
Main()
|