cpython/Demo/scripts/newslist.py

366 lines
11 KiB
Python
Raw Normal View History

#! /usr/bin/env python
1994-05-27 10:32:41 -03:00
#######################################################################
# Newslist $Revision$
#
# Syntax:
# newslist [ -a ]
#
# This is a program to create a directory full of HTML pages
1994-05-27 10:32:41 -03:00
# which between them contain links to all the newsgroups available
# on your server.
#
# The -a option causes a complete list of all groups to be read from
1994-05-27 10:32:41 -03:00
# the server rather than just the ones which have appeared since last
# execution. This recreates the local list from scratch. Use this on
1994-05-27 10:33:17 -03:00
# the first invocation of the program, and from time to time thereafter.
# When new groups are first created they may appear on your server as
1994-05-27 10:33:17 -03:00
# empty groups. By default, empty groups are ignored by the -a option.
# However, these new groups will not be created again, and so will not
# appear in the server's list of 'new groups' at a later date. Hence it
# won't appear until you do a '-a' after some articles have appeared.
#
1994-05-27 10:33:17 -03:00
# I should really keep a list of ignored empty groups and re-check them
# for articles on every run, but I haven't got around to it yet.
1994-05-27 10:32:41 -03:00
#
# This assumes an NNTP news feed.
#
# Feel free to copy, distribute and modify this code for
# non-commercial use. If you make any useful modifications, let me
1994-05-27 10:32:41 -03:00
# know!
#
# (c) Quentin Stafford-Fraser 1994
# fraser@europarc.xerox.com qs101@cl.cam.ac.uk
# #
#######################################################################
import sys,nntplib, string, marshal, time, os, posix, string
#######################################################################
# Check these variables before running! #
# Top directory.
# Filenames which don't start with / are taken as being relative to this.
1994-05-27 10:33:17 -03:00
topdir='/anfs/qsbigdisc/web/html/newspage'
1994-05-27 10:32:41 -03:00
# The name of your NNTP host
# eg.
1994-05-27 10:33:17 -03:00
# newshost = 'nntp-serv.cl.cam.ac.uk'
# or use following to get the name from the NNTPSERVER environment
1994-05-27 10:32:41 -03:00
# variable:
1994-05-27 10:33:17 -03:00
# newshost = posix.environ['NNTPSERVER']
newshost = 'nntp-serv.cl.cam.ac.uk'
1994-05-27 10:32:41 -03:00
# The filename for a local cache of the newsgroup list
treefile = 'grouptree'
# The filename for descriptions of newsgroups
# I found a suitable one at ftp.uu.net in /uunet-info/newgroups.gz
# You can set this to '' if you don't wish to use one.
1994-05-27 10:33:17 -03:00
descfile = 'newsgroups'
1994-05-27 10:32:41 -03:00
# The directory in which HTML pages should be created
# eg.
# pagedir = '/usr/local/lib/html/newspage'
# pagedir = 'pages'
1994-05-27 10:32:41 -03:00
pagedir = topdir
# The html prefix which will refer to this directory
# eg.
# httppref = '/newspage/',
1994-05-27 10:33:17 -03:00
# or leave blank for relative links between pages: (Recommended)
# httppref = ''
1994-05-27 10:32:41 -03:00
httppref = ''
# The name of the 'root' news page in this directory.
1994-05-27 10:32:41 -03:00
# A .html suffix will be added.
1994-05-27 10:33:17 -03:00
rootpage = 'root'
1994-05-27 10:32:41 -03:00
# Set skipempty to 0 if you wish to see links to empty groups as well.
# Only affects the -a option.
1994-05-27 10:33:17 -03:00
skipempty = 1
# pagelinkicon can contain html to put an icon after links to
# further pages. This helps to make important links stand out.
# Set to '' if not wanted, or '...' is quite a good one.
pagelinkicon='... <img src="http://pelican.cl.cam.ac.uk/icons/page.xbm"> '
1994-05-27 10:32:41 -03:00
# ---------------------------------------------------------------------
# Less important personal preferences:
# Sublistsize controls the maximum number of items the will appear as
# an indented sub-list before the whole thing is moved onto a different
# page. The smaller this is, the more pages you will have, but the
1994-05-27 10:32:41 -03:00
# shorter each will be.
sublistsize = 4
# That should be all. #
#######################################################################
for dir in os.curdir, os.environ['HOME']:
1998-09-14 13:44:15 -03:00
rcfile = os.path.join(dir, '.newslistrc.py')
if os.path.exists(rcfile):
print(rcfile)
2007-08-11 21:43:29 -03:00
exec(open(rcfile).read())
1998-09-14 13:44:15 -03:00
break
1994-05-27 10:32:41 -03:00
from nntplib import NNTP
from stat import *
rcsrev = '$Revision$'
rcsrev = string.join([s for s in string.split(rcsrev) if '$' not in s])
1994-05-27 10:32:41 -03:00
desc = {}
# Make (possibly) relative filenames into absolute ones
treefile = os.path.join(topdir,treefile)
descfile = os.path.join(topdir,descfile)
page = os.path.join(topdir,pagedir)
# First the bits for creating trees ---------------------------
# Addtotree creates/augments a tree from a list of group names
def addtotree(tree, groups):
print('Updating tree...')
for i in groups:
1998-09-14 13:44:15 -03:00
parts = string.splitfields(i,'.')
makeleaf(tree, parts)
1994-05-27 10:32:41 -03:00
# Makeleaf makes a leaf and the branch leading to it if necessary
def makeleaf(tree,path):
j = path[0]
l = len(path)
1994-05-27 10:32:41 -03:00
if j not in tree:
tree[j] = {}
if l == 1:
tree[j]['.'] = '.'
if l > 1:
makeleaf(tree[j],path[1:])
1994-05-27 10:32:41 -03:00
# Then the bits for outputting trees as pages ----------------
1994-05-27 10:32:41 -03:00
# Createpage creates an HTML file named <root>.html containing links
# to those groups beginning with <root>.
def createpage(root, tree, p):
filename = os.path.join(pagedir,root+'.html')
if root == rootpage:
detail = ''
else:
detail = ' under ' + root
f = open(filename,'w')
# f.write('Content-Type: text/html\n')
f.write('<TITLE>Newsgroups available' + detail + '</TITLE>\n')
f.write('<H1>Newsgroups available' + detail +'</H1>\n')
f.write('<A HREF="'+httppref+rootpage+'.html">Back to top level</A><P>\n')
printtree(f,tree,0,p)
f.write('<I>This page automatically created by \'newslist\' v. '+rcsrev+'.')
f.write(time.ctime(time.time()) + '</I><P>')
f.close()
1994-05-27 10:32:41 -03:00
# Printtree prints the groups as a bulleted list. Groups with
# more than <sublistsize> subgroups will be put on a separate page.
# Other sets of subgroups are just indented.
def printtree(f, tree, indent, p):
global desc
l = len(tree)
if l > sublistsize and indent>0:
# Create a new page and a link to it
f.write('<LI><B><A HREF="'+httppref+p[1:]+'.html">')
f.write(p[1:]+'.*')
f.write('</A></B>'+pagelinkicon+'\n')
createpage(p[1:], tree, p)
return
kl = sorted(tree.keys())
if l > 1:
if indent > 0:
# Create a sub-list
f.write('<LI>'+p[1:]+'\n<UL>')
else:
# Create a main list
f.write('<UL>')
indent = indent + 1
for i in kl:
if i == '.':
# Output a newsgroup
f.write('<LI><A HREF="news:' + p[1:] + '">'+ p[1:] + '</A> ')
if p[1:] in desc:
f.write(' <I>'+desc[p[1:]]+'</I>\n')
else:
f.write('\n')
else:
# Output a hierarchy
printtree(f,tree[i], indent, p+'.'+i)
if l > 1:
f.write('\n</UL>')
1994-05-27 10:32:41 -03:00
# Reading descriptions file ---------------------------------------
# This returns an array mapping group name to its description
1994-05-27 10:33:17 -03:00
def readdesc(descfile):
global desc
1994-05-27 10:32:41 -03:00
desc = {}
1994-05-27 10:32:41 -03:00
if descfile == '':
1998-09-14 13:44:15 -03:00
return
1994-05-27 10:32:41 -03:00
try:
d = open(descfile, 'r')
print('Reading descriptions...')
except (IOError):
print('Failed to open description file ' + descfile)
return
l = d.readline()
while l != '':
bits = string.split(l)
try:
grp = bits[0]
dsc = string.join(bits[1:])
if len(dsc)>1:
desc[grp] = dsc
except (IndexError):
pass
l = d.readline()
1994-05-27 10:32:41 -03:00
1994-05-27 10:33:17 -03:00
# Check that ouput directory exists, ------------------------------
# and offer to create it if not
1994-05-27 10:32:41 -03:00
1994-05-27 10:33:17 -03:00
def checkopdir(pagedir):
if not os.path.isdir(pagedir):
print('Directory '+pagedir+' does not exist.')
print('Shall I create it for you? (y/n)')
if sys.stdin.readline()[0] == 'y':
try:
os.mkdir(pagedir,0o777)
except:
print('Sorry - failed!')
sys.exit(1)
else:
print('OK. Exiting.')
1998-09-14 13:44:15 -03:00
sys.exit(1)
1994-05-27 10:32:41 -03:00
1994-05-27 10:33:17 -03:00
# Read and write current local tree ----------------------------------
1994-05-27 10:32:41 -03:00
1994-05-27 10:33:17 -03:00
def readlocallist(treefile):
print('Reading current local group list...')
tree = {}
try:
treetime = time.localtime(os.stat(treefile)[ST_MTIME])
except:
print('\n*** Failed to open local group cache '+treefile)
print('If this is the first time you have run newslist, then')
print('use the -a option to create it.')
sys.exit(1)
treedate = '%02d%02d%02d' % (treetime[0] % 100 ,treetime[1], treetime[2])
try:
dump = open(treefile,'r')
tree = marshal.load(dump)
dump.close()
except (IOError):
print('Cannot open local group list ' + treefile)
return (tree, treedate)
1994-05-27 10:33:17 -03:00
def writelocallist(treefile, tree):
try:
dump = open(treefile,'w')
groups = marshal.dump(tree,dump)
dump.close()
print('Saved list to '+treefile+'\n')
except:
print('Sorry - failed to write to local group cache '+treefile)
print('Does it (or its directory) have the correct permissions?')
sys.exit(1)
1994-05-27 10:33:17 -03:00
# Return list of all groups on server -----------------------------
def getallgroups(server):
print('Getting list of all groups...')
treedate='010101'
info = server.list()[1]
groups = []
print('Processing...')
if skipempty:
print('\nIgnoring following empty groups:')
for i in info:
grpname = string.split(i[0])[0]
if skipempty and string.atoi(i[1]) < string.atoi(i[2]):
print(grpname+' ', end=' ')
else:
groups.append(grpname)
print('\n')
if skipempty:
print('(End of empty groups)')
return groups
1994-05-27 10:33:17 -03:00
# Return list of new groups on server -----------------------------
def getnewgroups(server, treedate):
print('Getting list of new groups since start of '+treedate+'...', end=' ')
info = server.newgroups(treedate,'000001')[1]
print('got %d.' % len(info))
print('Processing...', end=' ')
groups = []
for i in info:
grpname = string.split(i)[0]
groups.append(grpname)
print('Done')
return groups
1994-05-27 10:33:17 -03:00
# Now the main program --------------------------------------------
1994-05-27 10:32:41 -03:00
1994-05-27 10:33:17 -03:00
def main():
global desc
tree={}
# Check that the output directory exists
Merged revisions 66394,66404,66412,66414,66424-66436 via svnmerge from svn+ssh://pythondev@svn.python.org/python/trunk ........ r66394 | benjamin.peterson | 2008-09-11 17:04:02 -0500 (Thu, 11 Sep 2008) | 1 line fix typo ........ r66404 | gerhard.haering | 2008-09-12 08:54:06 -0500 (Fri, 12 Sep 2008) | 2 lines sqlite3 module: Mark iterdump() method as "Non-standard" like all the other methods not found in DB-API. ........ r66412 | gerhard.haering | 2008-09-12 13:58:57 -0500 (Fri, 12 Sep 2008) | 2 lines Fixes issue #3103. In the sqlite3 module, made one more function static. All renaming public symbos now have the pysqlite prefix to avoid name clashes. This at least once created problems where the same symbol name appeared somewhere in Apache and the sqlite3 module was used from mod_python. ........ r66414 | gerhard.haering | 2008-09-12 17:33:22 -0500 (Fri, 12 Sep 2008) | 2 lines Issue #3846: Release GIL during calls to sqlite3_prepare. This improves concurrent access to the same database file from multiple threads/processes. ........ r66424 | andrew.kuchling | 2008-09-12 20:22:08 -0500 (Fri, 12 Sep 2008) | 1 line #687648 from Robert Schuppenies: use classic division. (RM Barry gave permission to update the demos.) ........ r66425 | andrew.kuchling | 2008-09-12 20:27:33 -0500 (Fri, 12 Sep 2008) | 1 line #687648 from Robert Schuppenies: use classic division. From me: don't use string exception; flush stdout after printing ........ r66426 | andrew.kuchling | 2008-09-12 20:34:41 -0500 (Fri, 12 Sep 2008) | 1 line #687648 from Robert Schuppenies: use classic division. From me: don't use string exception; add __main__ section ........ r66427 | andrew.kuchling | 2008-09-12 20:42:55 -0500 (Fri, 12 Sep 2008) | 1 line #687648 from Robert Schuppenies: use classic division. From me: remove two stray semicolons ........ r66428 | andrew.kuchling | 2008-09-12 20:43:28 -0500 (Fri, 12 Sep 2008) | 1 line #687648 from Robert Schuppenies: use classic division. ........ r66429 | andrew.kuchling | 2008-09-12 20:47:02 -0500 (Fri, 12 Sep 2008) | 1 line Remove semicolon ........ r66430 | andrew.kuchling | 2008-09-12 20:48:36 -0500 (Fri, 12 Sep 2008) | 1 line Subclass exception ........ r66431 | andrew.kuchling | 2008-09-12 20:56:56 -0500 (Fri, 12 Sep 2008) | 1 line Fix SyntaxError ........ r66432 | andrew.kuchling | 2008-09-12 20:57:25 -0500 (Fri, 12 Sep 2008) | 1 line Update uses of string exceptions ........ r66433 | andrew.kuchling | 2008-09-12 21:08:30 -0500 (Fri, 12 Sep 2008) | 1 line Use title case ........ r66434 | andrew.kuchling | 2008-09-12 21:09:15 -0500 (Fri, 12 Sep 2008) | 1 line Remove extra 'the'; the following title includes it ........ r66435 | andrew.kuchling | 2008-09-12 21:11:51 -0500 (Fri, 12 Sep 2008) | 1 line #3288: Document as_integer_ratio ........ r66436 | andrew.kuchling | 2008-09-12 21:14:15 -0500 (Fri, 12 Sep 2008) | 1 line Use title case ........
2008-09-13 12:58:53 -03:00
checkopdir(pagedir)
try:
print('Connecting to '+newshost+'...')
if sys.version[0] == '0':
s = NNTP.init(newshost)
else:
s = NNTP(newshost)
connected = 1
except (nntplib.error_temp, nntplib.error_perm) as x:
print('Error connecting to host:', x)
print('I\'ll try to use just the local list.')
connected = 0
# If -a is specified, read the full list of groups from server
if connected and len(sys.argv) > 1 and sys.argv[1] == '-a':
groups = getallgroups(s)
# Otherwise just read the local file and then add
# groups created since local file last modified.
else:
(tree, treedate) = readlocallist(treefile)
if connected:
groups = getnewgroups(s, treedate)
if connected:
addtotree(tree, groups)
writelocallist(treefile,tree)
# Read group descriptions
readdesc(descfile)
print('Creating pages...')
createpage(rootpage, tree, '')
print('Done')
1994-05-27 10:32:41 -03:00
if __name__ == "__main__":
main()
1994-05-27 10:32:41 -03:00
# That's all folks
######################################################################