summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorthomas grothe <grothe.tr@gmail.com>2023-11-29 23:43:15 -0600
committerthomas grothe <grothe.tr@gmail.com>2023-11-29 23:43:15 -0600
commit6cdf7ae3598e791a537b10885bd31c56a26b362e (patch)
tree00ef3b62bd64f6bba311bc36fa20bcc054fa2fbd
parent6354f6ae105f36c68124558eb45323d90b3ad992 (diff)
wow as python script
-rwxr-xr-xwow2
-rwxr-xr-xwow.py55
2 files changed, 56 insertions, 1 deletions
diff --git a/wow b/wow
index 8907ba9..85eb791 100755
--- a/wow
+++ b/wow
@@ -39,7 +39,7 @@ while getopts "f:l:r:v" opt; do
done
if [[ ${#files[@]} == 0 ]]; then
- files=(~/doc/j/_journal-toaug2022 ~/_poetry ~/doc/_journal_2019)
+ files=(~/doc/j/_journal-toaug2022 ~/_poetry ~/doc/_journal_2019 )
fi
if [ $V ]; then echo "Grabbing text sample from files: "${files[*]}; fi
diff --git a/wow.py b/wow.py
new file mode 100755
index 0000000..a9860cf
--- /dev/null
+++ b/wow.py
@@ -0,0 +1,55 @@
+#!/usr/bin/python
+#Words Of Wisdom
+# output some random text from my journal
+
+import os
+import filetype #for filetype (extension and mime type)
+import chardet #for getting encoding
+import subprocess
+
+total_text = ''
+
+def addSampleFileIfTxt(filepath):
+ global total_text
+ if v: print('checking {}'.format(filepath))
+ if os.path.isfile(filepath):
+ f = open(filepath, 'rb')
+ fb = f.read() #file data (bytes)
+ enc = str(chardet.detect(fb)['encoding'])
+ ftype = filetype.guess(filepath)
+ if v: print('filetype: {}'.format(str(ftype)))
+ if v: print('encoding: {}'.format(enc))
+ if enc in ['ascii','utf-8']:
+ if v: print(fb)
+ total_text += str(fb)
+ elif ftype.extension == 'odt':
+ total_text += subprocess.run(['odt2txt', filepath]).stdout
+ else:
+ if v: print('{} is not txt')
+ else:
+ if v: print('not file')
+
+
+
+paths=['/home/thomas/doc/j/', '/home/thomas/_poetry', '/home/thomas/doc/_journal_2019'] #the paths to scan recursively for files from which to grab text
+samplefiles=[] #the individual files we want to grab text from
+
+v=True #verbose
+
+for p in paths:
+ if v: print('path {}'.format(p))
+ if os.path.isdir(p):
+ for root,dirs,files in os.walk(p):
+ if v: print('walk {}: {} files, {} dirs'.format(root, len(files), len(dirs)))
+ for f in files:
+ addSampleFileIfTxt(root + '/' + f)
+ else:
+ addSampleFileIfTxt(p)
+
+print(total_text)
+#for f in samplefiles:
+# print(f)
+
+
+
+