diff options
| -rwxr-xr-x | wow.py | 27 |
1 files changed, 20 insertions, 7 deletions
@@ -1,8 +1,10 @@ #!/usr/bin/python #Words Of Wisdom -# output some random text from my journal +# output some random text from my journals and writings import os +import sys +import time import filetype #for filetype (extension and mime type) import chardet #for getting encoding import subprocess @@ -19,23 +21,29 @@ def addSampleFileIfTxt(filepath): ftype = filetype.guess(filepath) if v: print('filetype: {}'.format(str(ftype))) if v: print('encoding: {}'.format(enc)) + if ftype != None: + if ftype.extension in ['py', 'c', 'cc', 'h', 'hh', 'java']: #don't want code in the sample data + return + if ftype.extension == 'odt': + if v: print('converting odt: {}'.format(filepath)) + subproc = subprocess.run(['odt2txt', filepath], encoding='utf-8', stdout=subprocess.PIPE) + total_text += str(subproc.stdout) if enc in ['ascii','utf-8']: if v: print(fb) - total_text += str(fb) - elif ftype.extension == 'odt': - total_text += subprocess.run(['odt2txt', filepath]).stdout + total_text += str(fb, encoding='utf-8') + else: if v: print('{} is not txt') else: if v: print('not file') - +#paths=['/home/thomas/doc/fiction'] paths=['/home/thomas/doc/j/', '/home/thomas/_poetry', '/home/thomas/doc/_journal_2019'] #the paths to scan recursively for files from which to grab text samplefiles=[] #the individual files we want to grab text from -v=True #verbose - +v=False #verbose +tStart = time.time() for p in paths: if v: print('path {}'.format(p)) if os.path.isdir(p): @@ -46,6 +54,11 @@ for p in paths: else: addSampleFileIfTxt(p) +#now we have all of our files of interest. so pick a random one + +tEnd = time.time() +tDuration = tEnd - tStart +print('report generated in {} seconds, from paths {}'.format(tDuration, str(paths))) print(total_text) #for f in samplefiles: # print(f) |
