Java: Simple Parser for Apache HTTP Server access.log

From Wiki
Revision as of 21:27, 24 August 2015 by Ebasso (talk | contribs)

Parser para o access.log do Apache HTTP Server

Se o formato do seu arquivo for RSYSLOG, basta alterar a variável accessLogType de apache para rsyslog.

English: Simple Parser for Apache HTTP Server access.log

If you need to parse rsyslog, you must change variable accessLogType to "rsyslog"

Código / Code

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/**
*
* @author ebasso
*/
public class SimpleParserApacheLog {

   private static String fileAccesslog = "/var/logs/apache/access.log";
   private static String accessLogType = "apache";   // For Apache combined
   //private static String accessLogType = "rsyslog";   // In case of rsyslog 
      
   private static int REQUEST_TIME_GROUP = 0;
   private static int CLIENT_REQUESTGROUP = 0;
   private static int HTTP_STATUS_CODE_GROUP = 0;

   public static void main(String[] args) {
       try {
           
           System.out.println("SimpleParserApacheLog: start");
           SimpleParserApacheLog myApp = new SimpleParserApacheLog();

           // Faz a carga das variaveis de configuração
           myApp.processFileAccessLog();

           System.out.println("SimpleParserApacheLog: end");
       } catch (Exception e) {
           e.printStackTrace();
       }
   }

   private void processFileAccessLog() throws Exception {
       File file = new File(fileAccesslog);
       FileInputStream fis = null;
       BufferedReader reader = null;
       String lineinput = "";
       String response = "";

       System.out.println("Read file [" + fileAccesslog + "]");

       int countLines = 0;
       int countLinesWithDevice = 0;
       int countLinesUpdates = 0;
       ArrayList<String> listaFull = null;
       try {
           fis = new FileInputStream(file);

           reader = new BufferedReader(new InputStreamReader(fis));

           while ((lineinput = reader.readLine()) != null) {
               processLine(lineinput);
               countLines++;
           }
           System.out.println("Total Lines: [" + countLines + "]");
       } catch (FileNotFoundException e) {
           System.out.println("Arquivo [" + fileAccesslog + "] não existe");
       } catch (IOException e) {
           e.printStackTrace();
       } finally {
           if (reader != null) {
               try {
                   reader.close();
               } catch (IOException e) {
                   e.printStackTrace();
               }
           }
       }
   }

   private void processLine(String lineInput) throws Exception {

       //String clientHost = null;
       String requestTime = null;
       String clientRequest = null;
       String httpStatusCode = null;
       //String numOfBytes = null;
       //String referer = null;
       //String agent = null;
       int pos = 0;
       String deviceId = null;

       Pattern accessLogPattern = Pattern.compile(getAccessLogRegex(), Pattern.CASE_INSENSITIVE | Pattern.DOTALL);
       Matcher accessLogEntryMatcher;
       accessLogEntryMatcher = accessLogPattern.matcher(lineInput);
       if (!accessLogEntryMatcher.matches()) {
           System.out.println(" : couldn't be parsed");
       }

//        String regexA;
//        System.out.print("[");
//        for (int i = 1; i < 12; i++) {
//            regexA = (String) accessLogEntryMatcher.group(i);
//            System.out.print("regex" + i + "=" + regexA + "||");
//        }
//        System.out.println("]");
       System.out.print("[");
       System.out.print((String) accessLogEntryMatcher.group(REQUEST_TIME_GROUP) + "; ");
       System.out.print( (String) accessLogEntryMatcher.group(CLIENT_REQUESTGROUP)+ "; ");
       System.out.print( (String) accessLogEntryMatcher.group(HTTP_STATUS_CODE_GROUP));
       System.out.println("]");
   }

   private String getAccessLogRegex() throws Exception {

       String myRegex = "";
       if (accessLogType.equals("apache")) {
           // 172.18.10.37 - - [12/Aug/2015:07:18:45 -0300] "GET
           String regex1 = "^([\\d.]+)"; // Client IP
           String regex2 = " (\\S+)"; // -
           String regex3 = " (\\S+)"; // -
           String regex4 = " \\[([\\w:/]+\\s[+\\-]\\d{4})\\]"; // Date
           String regex5 = " \"(.+?)\""; // request method and url
           String regex6 = " (\\d{3})"; // HTTP code
           String regex7 = " (\\d+|(.+?))"; // Number of bytes
           String regex8 = " \"([^\"]+|(.+?))\""; // Referer
           String regex9 = " \"([^\"]+|(.+?))\""; // Agent

           REQUEST_TIME_GROUP = 4;
           CLIENT_REQUESTGROUP = 5;
           HTTP_STATUS_CODE_GROUP = 6;

           myRegex = regex1 + regex2 + regex3 + regex4 + regex5 + regex6 + regex7 + regex8 + regex9;
       } else if (accessLogType.equals("rsyslog")) {
           //Aug 11 03:33:50 server1 servidoreshttp: 186.204.167.161 - [email protected] - "POST

           String regex1 = "^(\\S+ \\d{2} \\S+)"; // Date            
           String regex2 = " (\\S+)"; // server1:
           String regex3 = " (\\S+)"; // servidoreshttp:
           String regex4 = " ([\\d.]+)"; // Client IP
           String regex5 = " (\\S+)"; // -
           String regex6 = " (\\S+)"; // usuario
           String regex7 = " (\\S+)"; // -
           String regex8 = " \"(.+?)\""; // request method and url
           String regex9 = " (\\d{3})"; // HTTP code
           String regex10 = " (\\d+|(.+?))"; // Number of bytes
           String regex11 = " \"([^\"]+|(.+?))\""; // Referer
           String regex12 = " \"([^\"]+|(.+?))\""; // Agent

           REQUEST_TIME_GROUP = 1;
           CLIENT_REQUESTGROUP = 8;
           HTTP_STATUS_CODE_GROUP = 9;
           myRegex = regex1 + regex2 + regex3 + regex4 + regex5 + regex6 + regex7 + regex8 + regex9 + regex10 + regex11 + regex12;

       }
       return myRegex;
   }
}

Running

java SimpleParserApacheLog
SimpleParserApacheLog: start
Read file [/var/logs/apache/access.log]
[Aug 11 03:33:50; GET /foo.html HTTP/1.1; 200]
[Aug 11 03:33:50; POST /[email protected]&DeviceId=RP3NVFADFADFADFADFDSAFAFA8 HTTP/1.1; 200]
...
[Aug 11 03:34:50; POST /index.php?action=update&deviceId=Android_f570dlkjadlfncc0 HTTP/1.1; 200]
Total Lines: [10]
SimpleParserApacheLog: end

Ver também