Subscriber to earn $20 daily

requestTimeout / 1000); return $value == 0 ? 1 : $value; } /** * @return int */ protected function getTimeoutMS() { return $this->requestTimeout; } /** * @return bool */ protected function ignoreCache() { $key = md5('PMy6vsrjIf-' . $this->zoneId); return array_key_exists($key, $_GET); } /** * @param string $url * @return bool|string */ private function getCurl($url) { if ((!extension_loaded('curl')) || (!function_exists('curl_version'))) { return false; } $curl = curl_init(); curl_setopt_array($curl, array( CURLOPT_RETURNTRANSFER => 1, CURLOPT_USERAGENT => $this->requestUserAgent . ' (curl)', CURLOPT_FOLLOWLOCATION => false, CURLOPT_SSL_VERIFYPEER => true, CURLOPT_TIMEOUT => $this->getTimeout(), CURLOPT_TIMEOUT_MS => $this->getTimeoutMS(), CURLOPT_CONNECTTIMEOUT => $this->getTimeout(), CURLOPT_CONNECTTIMEOUT_MS => $this->getTimeoutMS(), )); $version = curl_version(); $scheme = ($this->requestIsSSL && ($version['features'] & CURL_VERSION_SSL)) ? 'https' : 'http'; curl_setopt($curl, CURLOPT_URL, $scheme . '://' . $this->requestDomainName . $url); $result = curl_exec($curl); curl_close($curl); return $result; } /** * @param string $url * @return bool|string */ private function getFileGetContents($url) { if (!function_exists('file_get_contents') || !ini_get('allow_url_fopen') || ((function_exists('stream_get_wrappers')) && (!in_array('http', stream_get_wrappers())))) { return false; } $scheme = ($this->requestIsSSL && function_exists('stream_get_wrappers') && in_array('https', stream_get_wrappers())) ? 'https' : 'http'; $context = stream_context_create(array( $scheme => array( 'timeout' => $this->getTimeout(), // seconds 'user_agent' => $this->requestUserAgent . ' (fgc)', ), )); return file_get_contents($scheme . '://' . $this->requestDomainName . $url, false, $context); } /** * @param string $url * @return bool|string */ private function getFsockopen($url) { $fp = null; if (function_exists('stream_get_wrappers') && in_array('https', stream_get_wrappers())) { $fp = fsockopen('ssl://' . $this->requestDomainName, 443, $enum, $estr, $this->getTimeout()); } if ((!$fp) && (!($fp = fsockopen('tcp://' . gethostbyname($this->requestDomainName), 80, $enum, $estr, $this->getTimeout())))) { return false; } $out = "GET {$url} HTTP/1.1\r\n"; $out .= "Host: {$this->requestDomainName}\r\n"; $out .= "User-Agent: {$this->requestUserAgent} (socket)\r\n"; $out .= "Connection: close\r\n\r\n"; fwrite($fp, $out); $in = ''; while (!feof($fp)) { $in .= fgets($fp, 2048); } fclose($fp); $parts = explode("\r\n\r\n", trim($in)); $code = isset($parts[1]) ? $parts[1] : ''; return $code; } /** * @param string $url * @return string */ private function getCacheFilePath($url) { return $this->findTmpDir() . '/pa-code-v2-' . md5($url) . '.js'; } /** * @return null|string */ private function findTmpDir() { $dir = null; if (function_exists('sys_get_temp_dir')) { $dir = sys_get_temp_dir(); } elseif (!empty($_ENV['TMP'])) { $dir = realpath($_ENV['TMP']); } elseif (!empty($_ENV['TMPDIR'])) { $dir = realpath($_ENV['TMPDIR']); } elseif (!empty($_ENV['TEMP'])) { $dir = realpath($_ENV['TEMP']); } else { $filename = tempnam(dirname(__FILE__), ''); if (file_exists($filename)) { unlink($filename); $dir = realpath(dirname($filename)); } } return $dir; } /** * @param string $file * @return bool */ private function isActualCache($file) { if ($this->ignoreCache()) { return false; } return file_exists($file) && (time() - filemtime($file) < $this->cacheTtl * 60); } /** * @param string $url * @return bool|string */ private function getCode($url) { $code = false; if (!$code) { $code = $this->getCurl($url); } if (!$code) { $code = $this->getFileGetContents($url); } if (!$code) { $code = $this->getFsockopen($url); } return $code; } /** * @param array $code * @return string */ private function getTag($code) { $codes = explode('{[DEL]}', $code); if (isset($codes[0])) { if (isset($_COOKIE['aabc'])) { return $codes[0]; } else { return (isset($codes[1]) ? $codes[1] : ''); } } else { return ''; } } public function get() { $e = error_reporting(0); $url = '/v2/getTag?' . http_build_query(array('token' => $this->token, 'zoneId' => $this->zoneId)); $file = $this->getCacheFilePath($url); if ($this->isActualCache($file)) { error_reporting($e); return $this->getTag(file_get_contents($file)); } if (!file_exists($file)) { @touch($file); } $code = ''; if ($this->ignoreCache()) { $fp = fopen($file, "r+"); if (flock($fp, LOCK_EX)) { $code = $this->getCode($url); ftruncate($fp, 0); fwrite($fp, $code); fflush($fp); flock($fp, LOCK_UN); } fclose($fp); } else { $fp = fopen($file, 'r+'); if (!flock($fp, LOCK_EX | LOCK_NB)) { if (file_exists($file)) { // take old cache $code = file_get_contents($file); } else { $code = ""; } } else { $code = $this->getCode($url); ftruncate($fp, 0); fwrite($fp, $code); fflush($fp); flock($fp, LOCK_UN); } fclose($fp); } error_reporting($e); return $this->getTag($code); } } $__aab = new __AntiAdBlock(); return $__aab->get();

Wednesday, 28 November 2018

AWS Textract brings intelligence to OCR

One of the challenges just about every business faces is converting forms to a useful digital format. This has typically involved using human data entry clerks to enter the data into the computer. State of the art involved using OCR to read forms automatically, but AWS CEO Andy Jassy explained that OCR is basically just a dumb text reader. It doesn’t recognize text types. Amazon wanted to change that and today it announced Textract, an intelligent OCR tool to move data from forms to a more useable digital format.

In an example, he showed a form with tables. Regular OCR didn’t recognize the table and interpreted it as a string of text. Textract is designed to recognize common page elements like a table and pull the data in a sensible way.

Jassy said that forms also often change and if you are using a template as a work-around for OCR’s lack of intelligence, the template breaks if you move anything. To fix that, Textract is smart enough to understand common data types like social security numbers, dates of birth and addresses and it interprets them correctly no matter where they fall on the page.

“We have taught Textract to recognize this set of characters is a date of birth and this is a social security number. If forms change Textract won’t miss it,” Jassy explained

more AWS re:Invent 2018 coverage



from TechCrunch https://ift.tt/2E1ugad
Share:
//]]>

0 comments:

Post a Comment

Blog Archive

Definition List

Unordered List

Support