--- /dev/null
+#!/usr/bin/python
+
+import curses
+import locale
+import codecs
+
+locale.setlocale(locale.LC_ALL, '')
+code = locale.getpreferredencoding()
+
+superscript_numbers = {}
+
+try:
+ superscript_numbers = {
+ "0": unichr(8304).encode(code),
+ "1": unichr(185).encode(code),
+ "2": unichr(178).encode(code),
+ "3": unichr(179).encode(code),
+ "4": unichr(8308).encode(code),
+ "5": unichr(8309).encode(code),
+ "6": unichr(8310).encode(code),
+ "7": unichr(8311).encode(code),
+ "8": unichr(8312).encode(code),
+ "9": unichr(8313).encode(code),
+ }
+except:
+ for number in range(0,10):
+ superscript_numbers[str(number)] = str(number)
+
+crossworddata = codecs.open("g2-20090701.txt", "r", "utf-8").read()
+
+def parsecrossword(crossworddata):
+ ingrid = False
+ inacross = False
+ indown = False
+ crossword = {"grid": [], "across": {}, "down": {}, "grid_questions_start": [], "grid_questions_end": []}
+
+ for line in crossworddata.split("\n"):
+ line = line.strip("\n")
+ if line == "GRID":
+ ingrid = True
+ inacross = False
+ indown = False
+ elif line == "ACROSS":
+ ingrid = False
+ inacross = True
+ indown = False
+ elif line == "DOWN":
+ ingrid = False
+ inacross = False
+ indown = True
+ else:
+ if ingrid:
+ if line != "":
+ crossword["grid"].append(line)
+ if inacross:
+ if line != "":
+ parts = line.split()
+ question_number = int(parts[0])
+ clue = " ".join(parts[1:])
+ crossword["across"][int(question_number)] = clue
+ if indown:
+ if line != "":
+ parts = line.split()
+ question_number = int(parts[0])
+ clue = " ".join(parts[1:])
+ crossword["down"][int(question_number)] = clue
+ num_cols = len(crossword["grid"][0])
+ num_rows = len(crossword["grid"])
+
+ current_clue_number = 1
+
+ for row in range(0,num_rows):
+ crossword["grid_questions_start"].append([])
+
+ for row in range(0,num_rows):
+ for col in range(0,num_cols):
+ have_clue = False
+ if col > 0 \
+ and crossword["grid"][row][col - 1] == "x" \
+ and crossword["grid"][row][col] != "x" \
+ and col < (num_cols - 1) \
+ and crossword["grid"][row][col + 1] != "x":
+ have_clue = True
+ if col == 0 and crossword["grid"][row][col] != "x" \
+ and crossword["grid"][row][col + 1] != "x":
+ have_clue = True
+ if row > 0 and crossword["grid"][row-1][col] == "x" \
+ and row < (num_rows - 1) \
+ and crossword["grid"][row][col] != "x" \
+ and crossword["grid"][row + 1][col] != "x":
+ have_clue = True
+ if row == 0 and crossword["grid"][row][col] != "x" \
+ and crossword["grid"][row + 1][col] != "x":
+ have_clue = True
+ if have_clue:
+ crossword["grid_questions_start"][row].append(current_clue_number)
+ current_clue_number += 1
+ else:
+ crossword["grid_questions_start"][row].append(0)
+
+ return crossword
+
+def crossword(stdscr, crossworddata):
+ crossword = parsecrossword(crossworddata)
+ cury = 0
+ grid_length = len(crossword["grid"][0])
+ curx = 0
+ stdscr.addch(cury, curx, curses.ACS_ULCORNER)
+ curx += 1
+ while ((curx - 1) / 4) < grid_length:
+ stdscr.addch(cury, curx, curses.ACS_HLINE)
+ stdscr.addch(cury, curx+1, curses.ACS_HLINE)
+ stdscr.addch(cury, curx+2, curses.ACS_HLINE)
+ stdscr.addch(cury, curx+3, curses.ACS_TTEE)
+ curx += 4
+ curx -= 1
+ stdscr.addch(cury, curx, curses.ACS_URCORNER)
+ cury += 1
+ curgridy=0
+ for line in crossword["grid"]:
+ datastr = ""
+ curx = 0
+ curgridx=0
+ for c in line:
+ stdscr.addch(cury, curx, curses.ACS_VLINE)
+ if curx > 0:
+ stdscr.addch(cury+1, curx, curses.ACS_PLUS)
+ else:
+ stdscr.addch(cury+1, curx, curses.ACS_LTEE)
+ stdscr.addch(cury+1, curx + 1, curses.ACS_HLINE)
+ stdscr.addch(cury+1, curx + 2, curses.ACS_HLINE)
+ stdscr.addch(cury+1, curx + 3, curses.ACS_HLINE)
+ curx += 1
+ if c == "x":
+ stdscr.addch(cury, curx, curses.ACS_BLOCK)
+ stdscr.addch(cury, curx+1, curses.ACS_BLOCK)
+ stdscr.addch(cury, curx+2, curses.ACS_BLOCK)
+ elif crossword["grid_questions_start"][curgridy][curgridx] > 0:
+ stdscr.addstr(cury, curx, ''.join([superscript_numbers[x].decode(code) for x in str(crossword["grid_questions_start"][curgridy][curgridx])]).encode(code))
+ curx += 3
+ curgridx += 1
+ else:
+ stdscr.addch(cury, curx, curses.ACS_VLINE)
+ stdscr.addch(cury + 1, curx, curses.ACS_RTEE)
+ cury += 2
+ curgridy += 1
+ cury -= 1
+ curx = 0
+ stdscr.addch(cury, curx, curses.ACS_LLCORNER)
+ curx += 1
+ while ((curx - 1) / 4) < grid_length:
+ stdscr.addch(cury, curx, curses.ACS_HLINE)
+ stdscr.addch(cury, curx+1, curses.ACS_HLINE)
+ stdscr.addch(cury, curx+2, curses.ACS_HLINE)
+ stdscr.addch(cury, curx+3, curses.ACS_BTEE)
+ curx += 4
+ curx -= 1
+ stdscr.addch(cury, curx, curses.ACS_LRCORNER)
+ while 1:
+ c = stdscr.getch()
+ if c == ord('q'):
+ break
+
+curses.wrapper(crossword, crossworddata)
+print parsecrossword(crossworddata)
--- /dev/null
+#!/bin/bash
+
+crossword=$(w3m -dump_source http://www.guardian.co.uk/crossword/quick/ | sed -e '/FORM action/ { /printer/ { s#^[^"]*"\([^"]*\)".*$#http://www.guardian.co.uk\1#; p; }; d; }; d;' | xargs w3m -dump)
+#crossword=$(<crossword.txt)
+
+grid=$(echo "$crossword" | sed -e '/┌/,/┘/p; d;')
+clues=$(echo "$crossword" | sed -e '/┘/,/\[sp\]/p; d;' | tail -n +3 - | head -n -1)
+across_break=$(($(echo "$clues" | sed -e '1{s#Down.*$##; p; }; d;' | wc -c) - 1))
+across=$(echo "$clues" | sed -e '2,${s#^\(.\{'$across_break','$across_break'\}\).*$#\1#; p; }; d;')
+down=$(echo "$clues" | sed -e '2,${s#^.\{'$across_break','$across_break'\}##; p; }; d;')
+
+function unformat_clues() {
+clues=$1
+OLDIFS="$IFS"
+IFS="
+"
+firstline=1
+for line in $clues; do
+ line_formatted=$(echo -n $line | sed -e 's#^[ ]*##; s#[ ]*$##;')
+ if [ "${line:0:1}" != " " ]; then
+ if [ $firstline -ne 1 ]; then
+ echo
+ else
+ firstline=0
+ fi
+ echo -n $line_formatted
+ else
+ if [ "x$line_formatted" != "x" ]; then
+ echo -n " "$line_formatted
+ fi
+ fi
+done
+IFS="$OLDIFS"
+}
+
+function unformat_grid() {
+ grid=$1
+ echo "$grid" | sed -e '/───/d; s#[0-9]# #g; s# # #g; s#● *#x#g; s#│##g;'
+}
+
+across_unformatted=$(unformat_clues "$across")
+down_unformatted=$(unformat_clues "$down")
+grid_unformatted=$(unformat_grid "$grid")
+
+echo GRID
+echo "$grid_unformatted"
+echo
+echo ACROSS
+echo "$across_unformatted"
+echo
+echo DOWN
+echo "$down_unformatted"