1 files changed, 20 insertions, 7 deletions
diff --git a/wow.py b/wow.py
index a9860cf..de563e8 100755
--- a/wow.py
+++ b/wow.py
@@ -1,8 +1,10 @@
 #!/usr/bin/python
 #Words Of Wisdom
-# output some random text from my journal
+# output some random text from my journals and writings
 
 import os
+import sys
+import time
 import filetype #for filetype (extension and mime type)
 import chardet #for getting encoding
 import subprocess
@@ -19,23 +21,29 @@ def addSampleFileIfTxt(filepath):
         ftype = filetype.guess(filepath)
         if v: print('filetype: {}'.format(str(ftype)))
         if v: print('encoding: {}'.format(enc))
+        if ftype != None:
+            if ftype.extension in ['py', 'c', 'cc', 'h', 'hh', 'java']: #don't want code in the sample data
+                return
+            if ftype.extension == 'odt': 
+                if v: print('converting odt: {}'.format(filepath))
+                subproc = subprocess.run(['odt2txt', filepath], encoding='utf-8', stdout=subprocess.PIPE)
+                total_text += str(subproc.stdout)
         if enc in ['ascii','utf-8']:
             if v: print(fb)
-            total_text += str(fb)
-        elif ftype.extension == 'odt':
-            total_text += subprocess.run(['odt2txt', filepath]).stdout
+            total_text += str(fb, encoding='utf-8')            
+            
         else:
             if v: print('{} is not txt')
     else: 
             if v: print('not file')
 
 
-
+#paths=['/home/thomas/doc/fiction']
 paths=['/home/thomas/doc/j/', '/home/thomas/_poetry', '/home/thomas/doc/_journal_2019'] #the paths to scan recursively for files from which to grab text
 samplefiles=[] #the individual files we want to grab text from
 
-v=True #verbose 
-
+v=False #verbose 
+tStart = time.time()
 for p in paths:
         if v: print('path {}'.format(p))
         if os.path.isdir(p):
@@ -46,6 +54,11 @@ for p in paths:
         else:
                addSampleFileIfTxt(p)
 
+#now we have all of our files of interest. so pick a random one
+
+tEnd = time.time()
+tDuration = tEnd - tStart
+print('report generated in {} seconds, from paths {}'.format(tDuration, str(paths)))
 print(total_text)
 #for f in samplefiles:
 #    print(f)