summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rwxr-xr-xwow.py27
1 files changed, 20 insertions, 7 deletions
diff --git a/wow.py b/wow.py
index a9860cf..de563e8 100755
--- a/wow.py
+++ b/wow.py
@@ -1,8 +1,10 @@
#!/usr/bin/python
#Words Of Wisdom
-# output some random text from my journal
+# output some random text from my journals and writings
import os
+import sys
+import time
import filetype #for filetype (extension and mime type)
import chardet #for getting encoding
import subprocess
@@ -19,23 +21,29 @@ def addSampleFileIfTxt(filepath):
ftype = filetype.guess(filepath)
if v: print('filetype: {}'.format(str(ftype)))
if v: print('encoding: {}'.format(enc))
+ if ftype != None:
+ if ftype.extension in ['py', 'c', 'cc', 'h', 'hh', 'java']: #don't want code in the sample data
+ return
+ if ftype.extension == 'odt':
+ if v: print('converting odt: {}'.format(filepath))
+ subproc = subprocess.run(['odt2txt', filepath], encoding='utf-8', stdout=subprocess.PIPE)
+ total_text += str(subproc.stdout)
if enc in ['ascii','utf-8']:
if v: print(fb)
- total_text += str(fb)
- elif ftype.extension == 'odt':
- total_text += subprocess.run(['odt2txt', filepath]).stdout
+ total_text += str(fb, encoding='utf-8')
+
else:
if v: print('{} is not txt')
else:
if v: print('not file')
-
+#paths=['/home/thomas/doc/fiction']
paths=['/home/thomas/doc/j/', '/home/thomas/_poetry', '/home/thomas/doc/_journal_2019'] #the paths to scan recursively for files from which to grab text
samplefiles=[] #the individual files we want to grab text from
-v=True #verbose
-
+v=False #verbose
+tStart = time.time()
for p in paths:
if v: print('path {}'.format(p))
if os.path.isdir(p):
@@ -46,6 +54,11 @@ for p in paths:
else:
addSampleFileIfTxt(p)
+#now we have all of our files of interest. so pick a random one
+
+tEnd = time.time()
+tDuration = tEnd - tStart
+print('report generated in {} seconds, from paths {}'.format(tDuration, str(paths)))
print(total_text)
#for f in samplefiles:
# print(f)