]> git.sommitrealweird.co.uk Git - curses-crossword.git/blob - parse-g2crossword.sh
skip to first blank in grid
[curses-crossword.git] / parse-g2crossword.sh
1 #!/bin/bash
2
3 crossword=$(w3m -dump_source http://www.guardian.co.uk/crossword/quick/ | sed -e '/FORM action/ { /printer/ { s#^[^"]*"\([^"]*\)".*$#http://www.guardian.co.uk\1#; p; }; d; }; d;' | xargs w3m -dump)
4 #crossword=$(<crossword.txt)
5
6 grid=$(echo "$crossword" | sed -e '/┌/,/┘/p; d;')
7 clues=$(echo "$crossword" | sed -e '/┘/,/\[sp\]/p; d;' | tail -n +3 - | head -n -1)
8 across_break=$(($(echo "$clues" | sed -e '1{s#Down.*$##; p; }; d;' | wc -c) - 1))
9 across=$(echo "$clues" | sed -e '2,${s#^\(.\{'$across_break','$across_break'\}\).*$#\1#; p; }; d;')
10 down=$(echo "$clues" | sed -e '2,${s#^.\{'$across_break','$across_break'\}##; p; }; d;')
11
12 function unformat_clues() {
13 clues=$1
14 OLDIFS="$IFS"
15 IFS="
16 "
17 firstline=1
18 for line in $clues; do
19     line_formatted=$(echo -n $line | sed -e 's#^[ ]*##; s#[ ]*$##;')
20     if [ "${line:0:1}" != " " ]; then
21         if [ $firstline -ne 1 ]; then
22             echo
23         else
24             firstline=0
25         fi
26         echo -n $line_formatted
27     else
28         if [ "x$line_formatted" != "x" ]; then
29             echo -n " "$line_formatted
30         fi
31     fi
32 done
33 IFS="$OLDIFS"
34 }
35
36 function unformat_grid() {
37     grid=$1
38     echo "$grid" | sed -e '/───/d; s#[0-9]# #g; s#   # #g; s#● *#x#g; s#│##g;'
39 }
40
41 across_unformatted=$(unformat_clues "$across")
42 down_unformatted=$(unformat_clues "$down")
43 grid_unformatted=$(unformat_grid "$grid")
44
45 echo GRID
46 echo "$grid_unformatted"
47 echo
48 echo ACROSS
49 echo "$across_unformatted"
50 echo
51 echo DOWN
52 echo "$down_unformatted"