
// File: news_1.cpp
//
// Description: This file was derived from the ..\NetMan\test_nm.cpp
//              test program file. It serves as a minimal example
//              for using the NetMan C++ class to retrieve HTML
//              documents from the Internet, and for storing data
//              on the local machine using C++ Library class.
//              The Library class uses the full text indexing
//              class TextIndex to build complete inverted word
//              indices for the retrieved documents.  The C++
//              class HTML is used to access the structured text
//              data in the retrieved HTML documents.
//

#include <windows.h>   // required for all Windows applications
#include "resource.h"  // Windows resource IDs
#include "resrc1.h"
#include "test_nm.h"   // specific to this program

#include "NetMan.h"
#include "Library.h"
#include "HTML.h"
#include "FileDir.h"
#include "TextNews.h"
#include "RTFNews.h"

// Define test data:

//char * HTTP_server[] = {
//    "www.rns.com",           "salvador.mt.cs.cmu.edu"};       
//char * docs[] = {
//   "/cgi-bin/nikos?Windows", "/cgi-bin/pursuit.exe?query=AI+NLP"};
//const int num_docs = 2;

// The following data will be retrieved from
// the configuration file:  Robot.CFG

char HTTP_server[10][60];
char docs[10][128];
int num_docs;

// Set the maximum number of Web documents to be retrieved from
// a single information source.  NOTE: be a good Internet citezen,
// and keep the following parameter set to a low value.  If you
// make this value very large (e.g., 50), you will retrieve
// documents which a low probability of being interesting to
// you.  When I am testing this program, I set MAX_SEARCH to 5,
// and usually set it to 10 or 20 for acutal use of the program.

const int MAX_SEARCH = 10;

// Allocate a pointer to a Library object:
static Library    *library;

// Define a few "expert system" style rule functions
// used by instances of C++ class HTML to search for
// valid references to other embedded HTML document
// references:

int rule_1(char *str)
{
    if (HTML::STR_FOUND("cgi-", str)) return 1; // disallow str
    return 0; // str is OK
}

int rule_2(char *str)
{
    if (HTML::STR_FOUND("http:", str))  return 0; // str is OK
    return 1;  // disallow str
}

// Define a rule to test the file extension of returned
// files.

int extension_ok(char *file_extension)
{
    // discard *.Z files
    if (strcmp(file_extension, "Z")   == 0)  return 0;
    // discard *.gz files 
    if (strcmp(file_extension, "gz")  == 0)  return 0;
    // discard *.zip files 
    if (strcmp(file_extension, "zip") == 0)  return 0;
    // discard *.ZIP files 
    if (strcmp(file_extension, "ZIP") == 0)  return 0;
    // discard *.tar files 
    if (strcmp(file_extension, "tar") == 0)  return 0; 
    // discard *.TAR files
    if (strcmp(file_extension, "TAR") == 0)  return 0; 
    return 1;  // file extension is OK
}


HINSTANCE hInst;          // current instance
HCURSOR hSaveCursor;      // handle to current cursor
HCURSOR hHourGlass;       // handle to hourglass cursor
static FARPROC lpfnDefaultEditBox;

char szApplicationName[] = "Test_NetMan";
char szWindowTitle[]     = "Test C++ NetMan class";

int CALLBACK WinMain(HINSTANCE hInstance, HINSTANCE hPrevInstance,
                     LPSTR lpCmdLine, int nCmdShow)
{
    MSG msg;

    if (!hPrevInstance)
    {   // Initialize application:
        if (!InitProgram(hInstance))
        { 
             return (FALSE);  // exit if we can not initialize
        }
    }


    if (!InitInstance(hInstance, nCmdShow))
    {
        return (FALSE);
    }

    // Process Window messages:

    while (GetMessage(&msg, NULL, 0, 0))
    {
        if (!TranslateAccelerator (msg.hwnd, NULL, &msg)) 
        {
           TranslateMessage(&msg);
           DispatchMessage(&msg);
        }
    }
    return (msg.wParam); // Returns the value from PostQuitMessage
}


BOOL InitProgram(HINSTANCE hInstance)  // Init wndow data and
{                                      // register class
    WNDCLASS  wc;
    wc.style         = CS_HREDRAW | CS_VREDRAW;
    wc.lpfnWndProc   = (WNDPROC)WndProc; 
    wc.cbClsExtra    = 0;                
    wc.cbWndExtra    = 0;               
    wc.hInstance     = hInstance;        
    wc.hIcon         = NULL;
    wc.hCursor       = LoadCursor(NULL, IDC_ARROW);
    wc.hbrBackground = (HBRUSH)(COLOR_WINDOW+1);
    wc.lpszMenuName  = MAKEINTRESOURCE(IDR_GENERIC);
    wc.lpszClassName = szApplicationName;
    return (RegisterClass(&wc));
}

BOOL InitInstance(HINSTANCE hInstance, int nCmdShow)
{
    HWND hWnd; // main window handle.
    hInst = hInstance; // save instance handle

    // from MSVC2.0 cursor.c example program:
    hHourGlass = LoadCursor(NULL, IDC_WAIT); 

    // create a main window:
    hWnd = CreateWindow(szApplicationName, szWindowTitle, 
                        WS_OVERLAPPEDWINDOW, CW_USEDEFAULT, 
                        0, CW_USEDEFAULT, 0, NULL,
                        NULL, hInstance, NULL);
    if (!hWnd) 
    {
       return (FALSE);   // error condition
    }

    ShowWindow(hWnd, nCmdShow);
    UpdateWindow(hWnd);
    return (TRUE);
}

// Default Dialog Edit box:

static char FAR cpEdit[256];

BOOL APIENTRY DefaultEditBox(HWND hWnd,
                             WORD iMessage,
                             WORD wParam,
                             LONG /* lParam */)
{
    switch (iMessage)
    {
        case WM_INITDIALOG:
            return TRUE;
        case WM_COMMAND:
            if (wParam == IDOK) {  // OK button hit
                for (int i=0; i<256; i++)
                    cpEdit[i] = '\0';
                GetDlgItemText(hWnd, IDC_EDIT1, &(cpEdit[0]), 80);
                EndDialog(hWnd, 0);
                return TRUE;
            } else {
                cpEdit[0] = '\0';
                return FALSE;
            }
        default:
            return FALSE;
    }
}

LRESULT CALLBACK WndProc(HWND hWnd, UINT message, 
                         WPARAM uParam, LPARAM lParam)
{
    int k, wmId, wmEvent;

    switch (message) {

        case WM_COMMAND:  // message: command from application menu

             wmId    = LOWORD(uParam);
             wmEvent = HIWORD(uParam);

             switch (wmId) 
             {
                case IDM_EXIT:
                   DestroyWindow (hWnd);
                   break;

                case IDM_FETCH:
                   {
                   FARPROC lpProc;
                   lpProc = MakeProcInstance((FARPROC)DefaultEditBox,
                                             hInst);
                   DialogBox(hInst,
                             MAKEINTRESOURCE(IDD_DIALOG1),
                             hWnd,
                             (DLGPROC)DefaultEditBox);
                   FreeProcInstance(lpProc);
                   }
                   if (cpEdit[0] == '\0')
                      break;

#ifdef DEBUG
                   MessageBox(NULL, cpEdit, 
                              "Selected keywords",MB_OK);
#endif

                   {
                     HTML html("Robot.CFG");
	                 char buf[256];
                     char host[100];
                     char doc[200];
                     char ext[12];
                     num_docs = 0;
	                 for (int iter=0; iter<10; iter++)
	                 {
	                    int k = html.next_tag_text("robot", buf, 256);
	                    if (k) // found a robot tag
	                    { 
                          int found_host_doc = 
                             html.get_host_and_doc(buf, host, doc, ext);
                          if (found_host_doc)
                          {
                             sprintf(&(HTTP_server[num_docs][0]),
                                     "%s", 
                                     host);
                             sprintf(&(docs[num_docs][0]),
                                     "%s%s", 
                                     doc, 
                                     cpEdit);
                             num_docs++;
#ifdef DEBUG
                             char buf2[256];
                             sprintf(buf2,
                                     "Robot host=|%s|, doc=|%s%s|", 
                                     host, 
                                     doc, 
                                     cpEdit);
                             MessageBox(NULL, 
                                        buf2, 
                                        "Info from Robot.CFG", 
                                        MB_OK);
#endif
                          }
                        }
                     }
                   }

                   // show hour glass cursor:
                   hSaveCursor = SetCursor(hHourGlass); 

                   // The following processing will take a while!
                   // (not the best strategy for Windows apps...)


                   // Delete any existing *.ROB files. We use the
                   // extension .ROB for HTML documents requested
                   // from Internet Robots:
                   {
                        FileDir fd(".\\*.ROB");
                        int num = fd.number_of_matched_files();
                        for (int i=0; i<num; i++) 
                        {
 #ifdef DEBUG
                            MessageBox(NULL, 
                                       fd.name(i), 
                                       "Deleting file", 
                                       MB_OK);
 #endif
                            FileDir::remove_file(fd.name(i));
                        }
                   } // FileDir destructor is called here


                   // Call the "usual" Web robots to get pointers
                   // to info sources:
                   for (k=0; k<num_docs; k++)
                   {
                      NetManager *net_manager =
                         new NetManager(HTTP_server[k], docs[k], "ROB");
                   }
                   {
                        FileDir fd(".\\*.ROB");   // HTML docs from robots
                        int num = fd.number_of_matched_files();
                        for (int i=0; i<num; i++) 
                        {
 #ifdef DEBUG
                            MessageBox(NULL, 
                                       fd.name(i), 
                                       "HTML file retrieved", 
                                       MB_OK);
 #endif
                            HTML html(fd.name(i));
                            html.add_rule(rule_1);
                            html.add_rule(rule_2);

                            for (int k=0; k<MAX_SEARCH; k++)
                            {
                                char buf[300], host[60];
                                char doc[128], ext[16];
                             	int k = 
                             	  html.next_tag_text("a", buf, 256);
                                int found_host_doc = 
                                   html.get_host_and_doc(buf, 
                                                         host, 
                                                         doc, 
                                                         ext);
                                if (found_host_doc)
                                {
                                    if (extension_ok(ext))
                                    {
#ifdef DEBUG
                                       char buf2[300];
                                       sprintf(buf2,
                                               "host=|%s|, doc=|%s|", 
                                               host, 
                                               doc);
                                       MessageBox(NULL, 
                                                  buf2, 
                                                  "HTML file requested", 
                                                  MB_OK);
#endif
                                       NetManager *net_manager =
                                         new NetManager(host, doc);
                                    }
                                }
                            }
                            // Delete the original HTML documents
                            // retrieved from the Web search agents:
#ifdef DEBUG
                            MessageBox(NULL, fd.name(i), 
                                       "HTML file from search Robot deleted",
                                       MB_OK);
#endif
                            FileDir::remove_file(fd.name(i));
                                
                        }
                    } // FileDir destructor(s) is called here

                    // At this point, we have used a few Web search
                    // Robots to retrieve custom built HTML documents
                    // with http: references to documents that we
                    // may be interested in. Then we retrieved up to
                    // MAX_SEARCH documents from each of these HTTP
                    // files generated for us by Web search Robots.
                    // Finally, we just removed the HTML documents
                    // generated by the search Robots, leaving only
                    // the referenced documents.

                    // Now, we will look for all http: references in
                    // this first set of documents, and retrieve all
                    // of these secondary referenced documents:

                   {
                        FileDir fd(".\\*.HTM");
                        int num = fd.number_of_matched_files();
                        for (int i=0; i<num; i++) 
                        {
 #ifdef DEBUG
                            MessageBox(NULL, 
                                       fd.name(i), 
                                       "HTML file retrieved", 
                                       MB_OK);
 #endif
                            HTML html(fd.name(i));
                            html.add_rule(rule_1);
                            html.add_rule(rule_2);

                            for (int k=0; k<MAX_SEARCH; k++)
                            {
                                char buf[300], host[60];
                                char doc[128], ext[16];
                             	int k = 
                             	  html.next_tag_text("a", buf, 256);
                                int found_host_doc = 
                                   html.get_host_and_doc(buf, 
                                                         host, 
                                                         doc, 
                                                         ext);
                                if (found_host_doc)
                                {
                                    if (extension_ok(ext))
                                    {
#ifdef DEBUG
                                       char buf2[300];
                                       sprintf(buf2,
                                               "host=|%s|, doc=|%s|", 
                                               host, 
                                               doc);
                                       MessageBox(NULL, 
                                                  buf2, 
                                                  "HTML file requested", 
                                                  MB_OK);
#endif
                                       NetManager *net_manager =
                                         new NetManager(host, doc);
                                    }
                                }
                            }
                        }
                    } // FileDir destructor(s) is called here

                    MessageBox(NULL,
                               "You can now break your SLIP or PPP connection",
                               "Done with Web retrieval",
                               MB_OK);

                    // restore previous cursor:
                    SetCursor(hSaveCursor);  

                   break;

                case IDM_MAKE_LIB:

                    // show hour glass cursor:
                    hSaveCursor = SetCursor(hHourGlass); 

                    {
                         FileDir::remove_file("test.idx");
                         FileDir idx_files(".\\*.IDX");
                         char new_index_name[64];
                         sprintf(new_index_name,
                                 "test%d.idx",
                                 idx_files.number_of_matched_files());
                        // Now we are ready to make a library out of
                        // all HTML documents in this directory:
                        FileDir fd(".\\*.HTM");
                        int num = fd.number_of_matched_files();
                        if (num > MAX_FILE_NAMES)
                        {
                            char buf[256];
                            sprintf(buf,
                                    "Too many HTML docs: max is %d (set in TextInd.h)",
                                    MAX_FILE_NAMES);
                            MessageBox(
                               NULL,
                               buf,
                               "Too many HTML files",
                               MB_OK);
                            num = MAX_FILE_NAMES;
                            sprintf(buf,
                                    "Only %d HTML docs are going to be processed",
                                    num);
                            MessageBox(NULL,
                                       buf,
                                       "Too many HTML files",
                                       MB_OK);
                       }
                       char * files[MAX_FILE_NAMES];

                       for (int i=0; i<num; i++) {
                            files[i] = new char[strlen(fd.name(i)) + 1];
                            sprintf(files[i], "%s", fd.name(i));
                       }
                       MessageBox(
                           NULL,
                           "Processing HTML files into a library...",
                           "Please click 'OK', and wait",
                           MB_OK);

                       // Make the library, or add to it if
                       // it already exists:
                       
                       Library *library = new Library("test.lib");
                       library->add_files(num, files);
                       delete library; // this closes all files
                       for (i=0; i<num; i++)  delete files[i];

                    }

                    // restore previous cursor:
                    SetCursor(hSaveCursor);  
                      
                   break;

                case IDM_MAKE_NEWS_PAPER:

                   {
                     FARPROC lpProc;
                     lpProc = MakeProcInstance((FARPROC)DefaultEditBox, 
                                               hInst);
                     DialogBox(hInst,
                               MAKEINTRESOURCE(IDD_DIALOG2),
                               hWnd,
                               (DLGPROC)DefaultEditBox);
                     FreeProcInstance(lpProc);
                   }
                   if (cpEdit[0] == '\0')
                      break;
                   
                   // show hour glass cursor:
                   hSaveCursor = SetCursor(hHourGlass); 
                   {
                     Library *library = new Library("test.lib");
                     // test "AI" search:
                     char key_words[4][20];
                     key_words[0][0] = key_words[1][0] = '\0';
                     key_words[2][0] = key_words[3][0] = '\0';
                     sscanf(cpEdit, "%s %s %s %s",
                            &(key_words[0][0]),
                            &(key_words[1][0]),
                            &(key_words[2][0]),
                            &(key_words[3][0]));
                     int num_key_words = 1;
                     if (key_words[1][0] != '\0') num_key_words = 2;
                     if (key_words[2][0] != '\0') num_key_words = 3;
                     if (key_words[3][0] != '\0') num_key_words = 4;

                     // The Library constructor wants char *array[] for keywords:
                     char *k_words[4];
                     k_words[0] = &(key_words[0][0]);
                     k_words[1] = &(key_words[1][0]);
                     k_words[2] = &(key_words[2][0]);
                     k_words[3] = &(key_words[3][0]);

                     int count = 
                        library->AI_search(num_key_words, 
                                           k_words);

                     char *files[50];
                     int num_files = 0;

                     for (int k=0; k<library->number_of_documents(); k++) 
                     {
                       char buf[128];
                       sprintf(buf,"Matching doc:%s\nRelevance:%f",
                               library->get_document_name(k),
                               library->get_document_relevance(k));
                       MessageBox(
                           NULL,
                           buf,
                           "Document match",
                           MB_OK);

                       if (library->get_document_relevance(k) > 0.001)
                       {
                           int doc_name_len = 
                              strlen(library->get_document_name(k));
                           files[num_files] = new char[doc_name_len + 1];
                           sprintf(files[num_files++], 
                                   "%s",
                                   library->get_document_name(k));
                       }

                     }
                     TextNews(k_words, 
                              num_key_words, 
                              files, 
                              num_files, 
                              "out.txt");
                     RTFNews (k_words, 
                              num_key_words, 
                              files, 
                              num_files, 
                              "out.RTF");

                     for (k=0; k<num_files; k++)  delete files[k];

                     delete library; // this closes all files
                   }

                   // restore previous cursor:
                   SetCursor(hSaveCursor);  
                   break;

                case IDM_DELETE_LIB:

                   // show hour glass cursor:
                   hSaveCursor = SetCursor(hHourGlass); 
                   FileDir::remove_file("test.lib");

                   {
                        FileDir fd(".\\*.ROB");
                        int num = fd.number_of_matched_files();
                        for (int i=0; i<num; i++) 
                        {
 #ifdef DEBUG
                            MessageBox(NULL, 
                                       fd.name(i), 
                                       "Deleting file", 
                                       MB_OK);
 #endif
                            FileDir::remove_file(fd.name(i));
                        }
                   }

                   {
                        FileDir fd(".\\TEST.*");
                        int num = fd.number_of_matched_files();
                        for (int i=0; i<num; i++) 
                        {
 #ifdef DEBUG
                            MessageBox(NULL, 
                                       fd.name(i), 
                                       "Deleting file", 
                                       MB_OK);
 #endif
                            FileDir::remove_file(fd.name(i));
                        }
                   }

                   // restore previous cursor:
                   SetCursor(hSaveCursor);  
                   break;

                default:
                   break;
             }
             break;

         case WM_DESTROY:
             PostQuitMessage(0);
             break;

         default: 
             return (DefWindowProc(hWnd, message, uParam, lParam));
    }
    return (0);
}

