diff options
| -rwxr-xr-x | 4chan_search/query.py | 4 | ||||
| -rwxr-xr-x | pass2txt.sh | 2 | ||||
| -rwxr-xr-x | wow2.py | 28 |
3 files changed, 29 insertions, 5 deletions
diff --git a/4chan_search/query.py b/4chan_search/query.py index 7863826..89a87d3 100755 --- a/4chan_search/query.py +++ b/4chan_search/query.py @@ -15,7 +15,7 @@ def processCatalog(catalog, b): continue url = "https://boards.4channel.org/"+b+"/thread/"+str(catalog[i]['threads'][j]['no']) if wod == "*" or wod == "" or wod.lower() in catalog[i]['threads'][j]['com'].lower(): - results_url.append(url) + results_url.append((url, catalog[i]['threads'][j]['last_modified'])) results_content.append(catalog[i]['threads'][j]['com']) for imgurl in pull4chImgs(url): results_img.append(imgurl) @@ -26,7 +26,7 @@ def processCatalog(catalog, b): if not 'com' in r: continue if wod.lower() in r['com'].lower(): - results_url.append(url+"#p"+str(catalog[i]['threads'][j]['last_replies'][k]['no'])) + results_url.append((url+"#p"+str(catalog[i]['threads'][j]['last_replies'][k]['no'],catalog[i]['threads'][j]['last_replies'][k]['last_modified']))) results_content.append(catalog[i]['threads'][j]['last_replies'][k]['com']) #imgs were already retrieved from OP grab diff --git a/pass2txt.sh b/pass2txt.sh index 1b51f25..787cc8d 100755 --- a/pass2txt.sh +++ b/pass2txt.sh @@ -15,7 +15,7 @@ echo "and writing to "${outfile} for f in `find ${passdb} | grep \.gpg`; do #echo $f >> ${outfile} #pass `echo $f | sed 's/\.gpg//g'` >> ${outfile} - pswd=`gpg --quiet -d ${f} >> ${outfile} ` + pswd=`gpg --quiet -d ${f}` echo "${f} : ${pswd}" >> ${outfile} done @@ -1,6 +1,7 @@ #!/usr/bin/python #Words Of Wisdom -# output some random text from my journals and writings +# output some random text from some given collection of files, +# - primarily used to grab some random "words of wisdom" from my journals and writings import os import sys @@ -9,9 +10,12 @@ import filetype #for filetype (extension and mime type) import chardet #for getting encoding import subprocess import random +import getopt total_text = '' + + #can we parse text out of this file or is it a libreoffice file? if so, return the contents def attemptReadSampleFile(filepath): if v: print('checking {}'.format(filepath)) @@ -26,6 +30,9 @@ def attemptReadSampleFile(filepath): if ftype.extension == 'odt' and filepath[len(filepath)-1] != '#': #openoffice doc and not a lock file subproc = subprocess.run(['odt2txt', filepath], encoding='utf-8', stdout=subprocess.PIPE) return subproc.stdout + if ftype.extension == 'txt': + print('AYO!') + return str(f.read(), encoding='utf-8') else: fb = f.read() #file data (bytes) to detect encoding enc = str(chardet.detect(fb)['encoding']) @@ -42,6 +49,22 @@ def attemptReadSampleFile(filepath): #paths=['/home/thomas/doc/fiction'] paths=['/home/thomas/doc/j/', '/home/thomas/_poetry', '/home/thomas/doc/_journal_2019'] #the paths to scan recursively for files from which to grab text samplefiles=[] #the paths of the individual files from which we want to grab text +matchpattern='' #if we want to filter the files by some text pattern +time_min = -1 #threshold time. dont use files that are older + +ags,vals = getopt.getopt(sys.argv[1:], 'h:e:') +for a,v in ags: + if a == '-h': + print('TODO usage info') + if a == '-e': #selected file must match some text pattern + matchpattern = v #TODO + if a == '-t': + time_min = int(v) #TODO + +if len(sys.argv) > 1: + paths = sys.argv[1:] + + v=False #verbose tStart = time.time() @@ -68,7 +91,8 @@ while t == None: t = attemptReadSampleFile(samplefiles[fi]) print() -print('{} : {}'.format(samplefiles[fi], t)) +mt = time.ctime(os.path.getmtime(samplefiles[fi])) +print('{} ;\n last modified {} :\n {}'.format(samplefiles[fi], mt, t)) lines = t.splitlines() li = random.randint(0, len(t)) #line index |
